15614 lines (15614 with data), 611.8 kB
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# HuggingFace Installations"
],
"metadata": {
"id": "vawMUV8TT2dg"
}
},
{
"cell_type": "code",
"source": [
"!pip install datasets\n",
"!pip install transformers\n",
"!pip install seqeval"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "6yD-WetFMkwS",
"outputId": "5880d2ef-ce47-4a34-a522-5d433e689f8a"
},
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Collecting datasets\n",
" Downloading datasets-2.6.1-py3-none-any.whl (441 kB)\n",
"\u001b[K |████████████████████████████████| 441 kB 4.9 MB/s \n",
"\u001b[?25hRequirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (2.23.0)\n",
"Collecting huggingface-hub<1.0.0,>=0.2.0\n",
" Downloading huggingface_hub-0.10.1-py3-none-any.whl (163 kB)\n",
"\u001b[K |████████████████████████████████| 163 kB 84.5 MB/s \n",
"\u001b[?25hRequirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from datasets) (6.0)\n",
"Requirement already satisfied: aiohttp in /usr/local/lib/python3.7/dist-packages (from datasets) (3.8.3)\n",
"Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets) (21.3)\n",
"Collecting responses<0.19\n",
" Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n",
"Requirement already satisfied: dill<0.3.6 in /usr/local/lib/python3.7/dist-packages (from datasets) (0.3.5.1)\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets) (1.3.5)\n",
"Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.7/dist-packages (from datasets) (2022.8.2)\n",
"Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from datasets) (4.13.0)\n",
"Collecting multiprocess\n",
" Downloading multiprocess-0.70.13-py37-none-any.whl (115 kB)\n",
"\u001b[K |████████████████████████████████| 115 kB 93.2 MB/s \n",
"\u001b[?25hRequirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (6.0.1)\n",
"Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.7/dist-packages (from datasets) (4.64.1)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from datasets) (1.21.6)\n",
"Collecting xxhash\n",
" Downloading xxhash-3.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n",
"\u001b[K |████████████████████████████████| 212 kB 87.9 MB/s \n",
"\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (22.1.0)\n",
"Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (2.1.1)\n",
"Requirement already satisfied: typing-extensions>=3.7.4 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (4.1.1)\n",
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (1.8.1)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (6.0.2)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (1.3.1)\n",
"Requirement already satisfied: asynctest==0.13.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (0.13.0)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (1.2.0)\n",
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (4.0.2)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.2.0->datasets) (3.8.0)\n",
"Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->datasets) (3.0.9)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2022.9.24)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2.10)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (1.24.3)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (3.0.4)\n",
"Collecting urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1\n",
" Downloading urllib3-1.25.11-py2.py3-none-any.whl (127 kB)\n",
"\u001b[K |████████████████████████████████| 127 kB 91.7 MB/s \n",
"\u001b[?25hRequirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->datasets) (3.9.0)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2.8.2)\n",
"Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2022.4)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n",
"Installing collected packages: urllib3, xxhash, responses, multiprocess, huggingface-hub, datasets\n",
" Attempting uninstall: urllib3\n",
" Found existing installation: urllib3 1.24.3\n",
" Uninstalling urllib3-1.24.3:\n",
" Successfully uninstalled urllib3-1.24.3\n",
"Successfully installed datasets-2.6.1 huggingface-hub-0.10.1 multiprocess-0.70.13 responses-0.18.0 urllib3-1.25.11 xxhash-3.1.0\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Collecting transformers\n",
" Downloading transformers-4.23.1-py3-none-any.whl (5.3 MB)\n",
"\u001b[K |████████████████████████████████| 5.3 MB 5.0 MB/s \n",
"\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.8.0)\n",
"Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.13.0)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.64.1)\n",
"Collecting tokenizers!=0.11.3,<0.14,>=0.11.1\n",
" Downloading tokenizers-0.13.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)\n",
"\u001b[K |████████████████████████████████| 7.6 MB 71.4 MB/s \n",
"\u001b[?25hRequirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (6.0)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.21.6)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.10.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.10.1)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2022.6.2)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.10.0->transformers) (4.1.1)\n",
"Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.9)\n",
"Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.9.0)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.25.11)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2022.9.24)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n",
"Installing collected packages: tokenizers, transformers\n",
"Successfully installed tokenizers-0.13.1 transformers-4.23.1\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Collecting seqeval\n",
" Downloading seqeval-1.2.2.tar.gz (43 kB)\n",
"\u001b[K |████████████████████████████████| 43 kB 1.4 MB/s \n",
"\u001b[?25hRequirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.7/dist-packages (from seqeval) (1.21.6)\n",
"Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.7/dist-packages (from seqeval) (1.0.2)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.21.3->seqeval) (3.1.0)\n",
"Requirement already satisfied: scipy>=1.1.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.7.3)\n",
"Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.2.0)\n",
"Building wheels for collected packages: seqeval\n",
" Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16180 sha256=23a3a3ae907d4e838c06fc140623ceabf5f6a01792b4d0fc8b0bc856e17c3e53\n",
" Stored in directory: /root/.cache/pip/wheels/05/96/ee/7cac4e74f3b19e3158dce26a20a1c86b3533c43ec72a549fd7\n",
"Successfully built seqeval\n",
"Installing collected packages: seqeval\n",
"Successfully installed seqeval-1.2.2\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"id": "2LEFwSmbKpLP"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import spacy\n",
"import tqdm\n",
"import sys\n",
"from datasets import Dataset, DatasetDict\n",
"from transformers import Trainer\n",
"from transformers import AutoModelForTokenClassification\n",
"from transformers import AutoTokenizer\n",
"from transformers import TrainingArguments\n",
"from transformers import DataCollatorForTokenClassification\n",
"from datasets import load_metric\n",
"from transformers import pipeline\n",
"from transformers import EarlyStoppingCallback, IntervalStrategy"
]
},
{
"cell_type": "code",
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "G7DLsSK319zN",
"outputId": "6d759f8e-521e-4fcb-c859-0c4d4f620e58"
},
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Mounted at /content/drive\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"%cd /content/drive/MyDrive/IRE"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "czuvYXJp17x-",
"outputId": "c5fa02b9-9703-4ba6-8fe6-baa9c367f653"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/content/drive/MyDrive/IRE\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "JP_EFEuNKpLT"
},
"source": [
"# Loading Data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "V-_Zs-ZGKpLV",
"outputId": "aea55c01-65dd-4e58-916a-92e83b90ee95"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" filename mark label offset1 offset2 \\\n",
"0 es-S0212-71992007000100007-1 T1 ENFERMEDAD 40 61 \n",
"1 es-S0212-71992007000100007-1 T2 ENFERMEDAD 66 79 \n",
"2 es-S0212-71992007000100007-1 T3 ENFERMEDAD 1682 1698 \n",
"3 es-S0212-71992007000100007-1 T4 ENFERMEDAD 1859 1875 \n",
"4 es-S0212-71992007000100007-1 T5 ENFERMEDAD 1626 1648 \n",
"\n",
" span code \n",
"0 arterial hypertension 38341003 \n",
"1 polyarthrosis 36186002 \n",
"2 pleural effusion 60046008 \n",
"3 pleural effusion 60046008 \n",
"4 lower lobe atelectasis 46621007 "
],
"text/html": [
"\n",
" <div id=\"df-d46a4230-7e8d-49af-b801-224b9699bd0e\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>filename</th>\n",
" <th>mark</th>\n",
" <th>label</th>\n",
" <th>offset1</th>\n",
" <th>offset2</th>\n",
" <th>span</th>\n",
" <th>code</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>es-S0212-71992007000100007-1</td>\n",
" <td>T1</td>\n",
" <td>ENFERMEDAD</td>\n",
" <td>40</td>\n",
" <td>61</td>\n",
" <td>arterial hypertension</td>\n",
" <td>38341003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>es-S0212-71992007000100007-1</td>\n",
" <td>T2</td>\n",
" <td>ENFERMEDAD</td>\n",
" <td>66</td>\n",
" <td>79</td>\n",
" <td>polyarthrosis</td>\n",
" <td>36186002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>es-S0212-71992007000100007-1</td>\n",
" <td>T3</td>\n",
" <td>ENFERMEDAD</td>\n",
" <td>1682</td>\n",
" <td>1698</td>\n",
" <td>pleural effusion</td>\n",
" <td>60046008</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>es-S0212-71992007000100007-1</td>\n",
" <td>T4</td>\n",
" <td>ENFERMEDAD</td>\n",
" <td>1859</td>\n",
" <td>1875</td>\n",
" <td>pleural effusion</td>\n",
" <td>60046008</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>es-S0212-71992007000100007-1</td>\n",
" <td>T5</td>\n",
" <td>ENFERMEDAD</td>\n",
" <td>1626</td>\n",
" <td>1648</td>\n",
" <td>lower lobe atelectasis</td>\n",
" <td>46621007</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d46a4230-7e8d-49af-b801-224b9699bd0e')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-d46a4230-7e8d-49af-b801-224b9699bd0e button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-d46a4230-7e8d-49af-b801-224b9699bd0e');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 5
}
],
"source": [
"entities = pd.read_csv(\"data/entities.tsv\", delimiter=\"\\t\")\n",
"entities.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"id": "9ZrFWsjmKpLW"
},
"outputs": [],
"source": [
"list_off0 = list(entities['offset1'])\n",
"list_off1 = list(entities['offset2'])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"id": "Mafcxoi4KpLW"
},
"outputs": [],
"source": [
"text_files_path = \"data/text\""
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "OFCPFj9SKpLX",
"outputId": "8946ced4-8b26-49fe-a6e4-6aa3e4c8e16d"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"A 73-year-old patient with a history of arterial hypertension and polyarthrosis presented to the emergency department with abdominal distension and pain associated with constipation and febrile fever. The symptoms had started three weeks earlier and worsened during the four days prior to admission. During this period, an upper gastrointestinal fibroendoscopy (oesophagus, stomach and duodenum) and a colonoscopy (up to the splenic angle) were performed, but no abnormalities were found.\n",
"\n",
"Physical examination revealed a low-grade fever (37.6º C), a distended abdomen, diffusely painful on palpation, tympanised on percussion, with scant borborygmi but no evidence of peritonism, pulmonary auscultation with decreased ventilation in the lower half of the right hemithorax and the onset of intense pain on palpation and percussion of the last three dorsal spinous processes.\n",
"\n",
"Analyses showed 8.2 x 109 leukocytes / L, haemoglobin 136 g / L, platelets 186 x 109 / L. Except for glycaemia (123 mg/dl), the following laboratory parameters were normal or negative: urea, creatinine, bilirubin, transaminases, gamma-glutamyltranspeptidase, sodium, potassium, chlorine, calcium, phosphorus, creatine phosphokinase, amylase, lactate dehydrogenase (LDH), proteinogram, immunoglobulin dosage, alpha-fetoprotein, CA 19 antigens. 9 and CA 125 antigens, as well as general urinalysis. ESR and C-reactive protein were elevated, with values of 85 mm / 1 h and 133 mg / L (normal < 5 mg / L), respectively. Mantoux intradermal reaction (10 IU RT-23) was positive, with an induration of 25 mm. Chest X-ray showed an image compatible with right lower lobe atelectasis in the context of an ipsilateral pleural effusion. There were no signs suggestive of adenopathy or alterations in the cardiopericardial silhouette. A thoracoabdominal CT scan confirmed the existence of a right pleural effusion and identified prominent degenerative changes along the dorsolumbar spine but, above all, erosions in the vertebral plates adjacent to the D10-D11 disc space. A lumbar MRI showed hyposignal on T1-weighted sequences and hypersignal on T2-weighted sequences in these vertebrae and their corresponding disc, with morphological alterations typical of infectious spondylodiscitis D10-D11. Three serial blood cultures were negative. Samples obtained by aspiration of the D10-D11 space showed gram-positive cocci chains, which were subsequently recovered and typed as penicillin-sensitive Streptococcus pneumoniae. Pleural fluid analysis showed pH: 7.55; leucocytes: 8.4 x 109/L (58% neutrophils, 26% eosinophils, 16% lymphocytes), protein: 48 g/L (ratio to serum protein: 0.65), glucose: 125 mg/dl, ADA: 25.92 IU/ml, LDH: 362 U/L (pleural LDH/serum LDH ratio: 0.8). Both auramine-rhodamine staining and Löwenstein-Jensen medium culture of pleural fluid were negative and cytology showed no evidence of neoplastic cells.\n",
"\n",
"\n",
"\n",
"The patient was initially treated intravenously with amoxicillin + clavulanic acid (1 g / 200 mg, every 8 hours). After 21 days, she was switched to the oral route (875 / 125 mg, every 8 hours) for 6 weeks. The evolution was favourable and she was able to start walking with a dorsolumbar corset after the fourth week. One month after the end of antibiotic therapy, a control chest CT scan still showed a discrete pleural effusion, but the patient had only mild mechanical dorsalgia, her ESR had decreased to 21 mm / 1 h and her CRP was 2.4 mg/L. Outpatient follow-up continued for a further three years, during which time the evolution was favourable and a D10-D11 vertebral block was formed.\n",
"\n",
"\n",
"\n",
"\n",
"\n"
]
}
],
"source": [
"f = open(text_files_path + \"/\" + entities.iloc[1,0] + \".txt\", \"r\", encoding=\"UTF-8\")\n",
"for l in f:\n",
" print(l)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "UeML6HJmKpLX",
"outputId": "de8c52d4-49f7-441b-89af-cce4183b18e9"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"100%|██████████| 6650/6650 [01:51<00:00, 59.70it/s]\n"
]
}
],
"source": [
"#Clinical cases\n",
"HCs = {}\n",
"for fid in tqdm.tqdm(range(len(entities[\"filename\"]))):\n",
" fname = entities[\"filename\"][fid]\n",
" with open(text_files_path + \"/\" + fname + \".txt\", \"r\", encoding=\"UTF-8\") as f:\n",
" HCs.update({fname: f.read()})"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"id": "nivRpUrBKpLY"
},
"outputs": [],
"source": [
"#Diseases\n",
"ENF = {}\n",
"enfermedades = []\n",
"fn = entities[\"filename\"][0]\n",
"for fname, enf in zip(entities[\"filename\"], entities[\"span\"]):\n",
" if fname!=fn:\n",
" enfermedades = []\n",
" enfermedades.append(enf)\n",
" ENF.update({fname: enfermedades})\n",
" fn = fname"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "nYI19JFtKpLY",
"outputId": "68cd794c-7b51-488f-ccf8-af9e574794cb"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"741"
]
},
"metadata": {},
"execution_count": 11
}
],
"source": [
"len(ENF)"
]
},
{
"cell_type": "markdown",
"source": [
"# Preprocessing"
],
"metadata": {
"id": "qviw-SCgUDwK"
}
},
{
"cell_type": "code",
"source": [
"!python -m spacy download en_core_web_sm"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "xtkwMJhYNP17",
"outputId": "4c9fdacb-c17f-4bef-909d-fe6b1117dc06"
},
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Collecting en-core-web-sm==3.4.1\n",
" Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.1/en_core_web_sm-3.4.1-py3-none-any.whl (12.8 MB)\n",
"\u001b[K |████████████████████████████████| 12.8 MB 2.6 MB/s \n",
"\u001b[?25hRequirement already satisfied: spacy<3.5.0,>=3.4.0 in /usr/local/lib/python3.7/dist-packages (from en-core-web-sm==3.4.1) (3.4.1)\n",
"Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (2.0.8)\n",
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (4.64.1)\n",
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (2.0.7)\n",
"Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (2.4.4)\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (57.4.0)\n",
"Requirement already satisfied: pathy>=0.3.5 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (0.6.2)\n",
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (2.23.0)\n",
"Requirement already satisfied: thinc<8.2.0,>=8.1.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (8.1.4)\n",
"Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.9 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (3.0.10)\n",
"Requirement already satisfied: wasabi<1.1.0,>=0.9.1 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (0.10.1)\n",
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (3.0.8)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (2.11.3)\n",
"Requirement already satisfied: typing-extensions<4.2.0,>=3.7.4 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (4.1.1)\n",
"Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (3.3.0)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (21.3)\n",
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (1.0.9)\n",
"Requirement already satisfied: pydantic!=1.8,!=1.8.1,<1.10.0,>=1.7.4 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (1.9.2)\n",
"Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (1.0.3)\n",
"Requirement already satisfied: typer<0.5.0,>=0.3.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (0.4.2)\n",
"Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (1.21.6)\n",
"Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from catalogue<2.1.0,>=2.0.6->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (3.9.0)\n",
"Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (3.0.9)\n",
"Requirement already satisfied: smart-open<6.0.0,>=5.2.1 in /usr/local/lib/python3.7/dist-packages (from pathy>=0.3.5->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (5.2.1)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (2.10)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (2022.9.24)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (1.25.11)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (3.0.4)\n",
"Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.7/dist-packages (from thinc<8.2.0,>=8.1.0->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (0.0.3)\n",
"Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.7/dist-packages (from thinc<8.2.0,>=8.1.0->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (0.7.8)\n",
"Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/lib/python3.7/dist-packages (from typer<0.5.0,>=0.3.0->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (7.1.2)\n",
"Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from jinja2->spacy<3.5.0,>=3.4.0->en-core-web-sm==3.4.1) (2.0.1)\n",
"Installing collected packages: en-core-web-sm\n",
" Attempting uninstall: en-core-web-sm\n",
" Found existing installation: en-core-web-sm 3.4.0\n",
" Uninstalling en-core-web-sm-3.4.0:\n",
" Successfully uninstalled en-core-web-sm-3.4.0\n",
"Successfully installed en-core-web-sm-3.4.1\n",
"\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
"You can now load the package via spacy.load('en_core_web_sm')\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"id": "6at1DC6zKpLZ"
},
"outputs": [],
"source": [
"nlp = spacy.load(\"en_core_web_sm\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"id": "r-OrghwTKpLZ"
},
"outputs": [],
"source": [
"HCs_tokenized = []\n",
"for hc in HCs:\n",
" hl = []\n",
" tokens = nlp(HCs[hc])\n",
" #tokens = HCs[hc].split(\" \") #The simplest option\n",
" for t in tokens:\n",
" hl.append(str(t))\n",
" HCs_tokenized.append(hl)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "c4Nh9SJ9KpLa",
"outputId": "4cf4dd3e-e57b-4f52-98d3-ba2fc55632ab"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"741"
]
},
"metadata": {},
"execution_count": 15
}
],
"source": [
"len(HCs_tokenized)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"id": "mJkuhsnTKpLa"
},
"outputs": [],
"source": [
"Ent_tokenized = []\n",
"for enf in ENF:\n",
" Tks = []\n",
" for e in ENF[enf]:\n",
" sl = []\n",
" tokens = nlp(e)\n",
" #tokens = e.split(\" \")\n",
" for t in tokens:\n",
" sl.append(str(t))\n",
" Tks.append(sl)\n",
" Ent_tokenized.append(Tks)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "195X74buKpLa",
"outputId": "ad159ed3-e174-4048-9d30-9a92e94dd653"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"741"
]
},
"metadata": {},
"execution_count": 17
}
],
"source": [
"len(Ent_tokenized)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "mtLgqICDKpLb",
"outputId": "d6befaf5-1174-4e9e-8cb1-768df6a768ac"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[['arterial', 'hypertension'],\n",
" ['polyarthrosis'],\n",
" ['pleural', 'effusion'],\n",
" ['pleural', 'effusion'],\n",
" ['lower', 'lobe', 'atelectasis'],\n",
" ['infectious', 'spondylodiscitis', 'D10', '-', 'D11'],\n",
" ['pleural', 'effusion']]"
]
},
"metadata": {},
"execution_count": 18
}
],
"source": [
"Ent_tokenized[0]"
]
},
{
"cell_type": "markdown",
"source": [
"# Tagging Data with BIO scheme"
],
"metadata": {
"id": "ZapndudTUQvP"
}
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"id": "O94wXbu4KpLb"
},
"outputs": [],
"source": [
"def find_idx(list_to_check, item_to_find):\n",
" indices = []\n",
" for idx, value in enumerate(list_to_check):\n",
" if value == item_to_find:\n",
" indices.append(idx)\n",
" return indices"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "xy-dmZodKpLb",
"outputId": "d34b67ea-5d01-4120-9cdb-4801dc7a8108"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['A', '43', '-', 'year', '-', 'old', 'man', 'was', 'admitted', 'to', 'the', 'emergency', 'department', 'due', 'to', 'sudden', 'left', 'lumbar', 'pain', ',', 'continuous', 'and', 'incapacitating', ',', 'without', 'antalgic', 'position', 'or', 'aggravating', 'factors', ',', 'without', 'irradiation', ',', 'with', 'approximately', '23', 'hours', 'of', 'evolution', '.', 'No', 'nausea', 'or', 'vomiting', ',', 'no', 'macroscopic', 'haematuria', 'or', 'lower', 'urinary', 'tract', 'discomfort', '.', 'Absence', 'of', 'precordial', 'pain', '.', 'Hypertension', 'controlled', 'with', 'verapamil', '.', 'He', 'reported', 'an', 'episode', ',', 'interpreted', 'as', 'a', 'transient', 'ischaemic', 'attack', ',', 'approximately', 'eight', 'weeks', 'earlier', '(', 'not', 'confirmed', ')', '.', 'No', 'history', 'of', 'cardiac', 'arrhythmia', 'or', 'valvular', 'heart', 'disease', '.', 'No', 'other', 'previous', 'thromboembolic', 'episodes', '.', 'No', 'known', 'history', 'of', 'urinary', 'lithiasis', '.', 'No', 'osteoarticular', 'or', 'respiratory', 'complaints', '.', 'No', 'cocaine', 'abuse', '.', 'No', 'history', 'of', 'hepatitis', 'B', 'or', 'C.', 'Medicated', 'with', '160', 'mg', '/', 'day', 'of', 'verapamil', '.', '\\n', 'Physical', 'examination', 'showed', 'diaphoretic', ',', 'BP', '150', '/', '110', 'mmHg', ',', '80', 'beats', 'per', 'minute', ',', 'rhythmic', 'and', 'wide', '.', 'Temperature', '37.8', 'ºC.', 'Abdomen', 'painful', 'on', 'deep', 'palpation', 'in', 'the', 'left', 'iliac', 'fossa', 'and', 'flank', ',', 'with', 'defence', ',', 'with', 'no', 'signs', 'of', 'peritoneal', 'irritation', '.', 'Decreased', 'RHA', '.', 'No', 'abdominal', 'murmurs', '.', 'Negative', 'bilateral', 'renal', 'Murphy', '.', 'Existence', 'of', 'symmetrical', 'arterial', 'pulses', '.', 'No', 'perfusion', 'deficit', 'in', 'the', 'extremities', '.', 'General', 'neurological', 'examination', 'without', 'alterations', '.', '\\n', 'Renal', 'ultrasound', 'showed', 'no', 'abnormalities', ',', 'especially', 'dilatation', 'of', 'the', 'urinary', 'tract', '.', 'Laboratory', 'tests', ':', 'Hb15.6', 'g', '/', 'dL', ',', 'Leuc', '13,800/µL', ',', 'Neut', '76.1', '%', ',', 'Creat', '1.4', 'mg', '/', 'dL', ',', 'TGO', '104', 'UI', '/', 'L', ',', 'TGP', '74', 'UI', '/', 'L', ',', 'LDH', '1,890', 'UI', '/', 'L.', 'Coagulation', 'parameters', 'showed', 'no', 'alterations', '.', 'The', 'ECG', 'showed', 'sinus', 'rhythm', ',', 'with', 'no', 'alterations', 'compatible', 'with', 'acute', 'myocardial', 'ischaemia', '.', 'An', 'abdominal', 'and', 'pelvic', 'CT', 'scan', 'was', 'requested', ',', 'which', 'showed', 'the', 'presence', 'of', 'multiple', 'areas', 'without', 'contrast', 'uptake', 'in', 'the', 'left', 'kidney', ',', 'without', 'morpho', '-', 'structural', 'alterations', ',', 'compatible', 'with', 'multifocal', 'areas', 'of', 'ischaemia', ',', 'with', 'multisegmental', 'distribution', ',', 'probably', 'of', 'embolic', 'aetiology', '.', 'No', 'aortic', 'dilatation', 'or', 'renal', 'artery', 'aneurysm', '.', 'No', 'intra', '-', 'peritoneal', 'alterations', '.', 'Taking', 'into', 'account', 'the', 'multi', '-', 'segmental', 'distribution', 'of', 'the', 'ischaemic', 'process', 'and', 'the', 'duration', 'of', 'discomfort', ',', 'we', 'decided', 'that', 'there', 'was', 'no', 'indication', 'for', 'invasive', 'manoeuvres', '.', 'The', 'patient', 'underwent', 'systemic', 'hypo', '-', 'coagulation', 'with', 'heparin', 'in', 'an', 'attempt', 'to', 'avoid', 'future', 'embolic', 'episodes', 'and', 'appropriate', 'analgesia', '.', '\\n\\n', 'He', 'then', 'underwent', 'multiple', 'examinations', 'in', 'an', 'attempt', 'to', 'identify', 'an', 'embolic', 'focus', 'and', 'the', 'aetiological', 'process', '.', 'Echocardiography', 'ruled', 'out', 'pathology', 'of', 'the', 'cardiac', 'valvular', 'apparatus', 'or', 'the', 'existence', 'of', 'valvular', 'vegetations', '.', 'Absence', 'of', 'areas', 'of', 'myocardial', 'dyskinesia', '.', 'Arteriography', 'showed', 'a', 'perfusion', 'deficit', 'of', 'the', 'lower', 'pole', 'of', 'the', 'left', 'kidney', 'with', 'multiple', 'other', 'less', 'prominent', 'areas', 'showing', 'perfusion', 'deficits', 'as', 'well', '.', 'No', 'other', 'alterations', 'such', 'as', 'macro', '/', 'microaneurysms', 'or', 'alterations', 'of', 'the', 'main', 'renal', 'artery', 'or', 'aorta', 'were', 'detected', '.', '\\n', 'The', 'study', 'to', 'rule', 'out', 'prothrombotic', 'and', 'vascular', 'disease', '(', 'lupus', 'anticoagulant', ',', 'anti', '-', 'cardiolipin', ',', 'ANCA', \"'s\", ',', 'detection', 'of', 'cryoglobulins', ',', 'ANA', \"'s\", ',', 'determination', 'of', 'immunoglobulins', 'and', 'complement', 'fractions', ')', 'was', 'negative', '.', '\\n', 'After', '15', 'months', 'of', 'follow', '-', 'up', ',', 'we', 'were', 'left', 'without', 'an', 'aetiological', 'diagnosis', '.', 'The', 'patient', 'remains', 'asymptomatic', ',', 'with', 'no', 'new', 'episodes', 'of', 'embolism', 'or', 'manifestations', 'of', 'systemic', 'disease', '.', 'Anti', '-', 'coagulation', 'therapy', 'has', 'been', 'discontinued', '.', 'He', 'maintains', 'controlled', 'hypertension', 'with', 'the', 'same', 'dose', 'of', 'verapamil', '.', 'The', 'last', 'analytical', 'control', 'had', 'a', 'serum', 'creatinine', 'of', '1.2', 'mg', '/', 'dL', ',', 'and', 'GFR', 'of', '93', 'ml', '/', 'min', '.', 'The', 'follow', '-', 'up', 'kinillogram', 'shows', 'a', 'functional', 'deficit', 'of', 'the', 'affected', 'renal', 'unit', '(', 'differential', 'function', '41', '%', ')', '.', '\\n\\n']\n",
"[['transient', 'ischaemic', 'attack'], ['cardiac', 'arrhythmia'], ['valvular', 'heart', 'disease'], ['thromboembolic', 'episodes'], ['urinary', 'lithiasis'], ['cocaine', 'abuse'], ['hepatitis', 'B', 'or', 'C'], ['Hypertension'], ['acute', 'myocardial', 'ischaemia'], ['aortic', 'dilatation'], ['renal', 'artery', 'aneurysm'], ['ischaemia'], ['ischaemia'], ['valvular', 'vegetations'], ['myocardial', 'dyskinesia'], ['macro', '/', 'microaneurysms', 'or', 'alterations', 'of', 'the', 'main', 'renal', 'artery', 'or', 'aorta'], ['prothrombotic', 'and', 'vascular', 'disease'], ['hypertension'], ['embolism'], ['systemic', 'disease']]\n",
"['hepatitis', 'B', 'or', 'C']\n",
"C\n",
"27\n",
"['A', 'four', '-', 'month', '-', 'old', 'boy', 'was', 'admitted', 'with', 'vomiting', ',', 'diarrhoea', 'and', 'severe', 'weight', 'and', 'body', 'weight', 'delay', '.', 'He', 'presented', 'hypotonia', ',', 'bilateral', 'convergent', 'strabismus', 'and', 'inverted', 'nipples', '.', 'Healthy', ',', 'non', '-', 'consanguineous', 'parents', ',', 'normal', 'pregnancy', 'and', 'delivery', '.', 'Since', 'the', 'age', 'of', 'one', 'month', 'she', 'has', 'had', 'poor', 'intake', ',', 'growth', 'failure', 'and', 'vomiting', ',', 'with', 'normal', 'abdominal', 'and', 'transfontanelar', 'ultrasound', ',', 'chest', 'X', '-', 'ray', ',', 'blood', 'tests', 'and', 'urine', 'culture', '.', '\\n\\n', 'On', 'admission', ',', 'rotavirus', 'was', 'detected', 'in', 'stool', '.', 'After', 'resolution', 'of', 'the', 'acute', 'process', ',', 'nutritional', 'support', 'was', 'started', 'with', 'artificial', 'protein', 'hydrolysate', 'formula', ';', 'later', ',', 'the', 'patient', 'was', 'switched', 'to', 'elemental', 'formula', 'as', 'he', 'continued', 'to', 'show', 'poor', 'weight', 'gain', '.', 'The', 'study', 'performed', 'detected', 'hypertransaminemia', ',', 'hypoalbuminemia', ',', 'decreased', 'transferrin', 'and', 'ceruloplasmin', ',', 'with', 'normal', 'hepatotropic', 'virus', 'serology', 'and', 'echocardiography', '.', 'Given', 'the', 'suspicion', 'of', 'a', 'possible', 'inborn', 'error', 'of', 'metabolism', '(', 'IEM', ')', ',', 'a', 'metabolic', 'study', 'was', 'requested', ',', 'while', 'the', 'patient', 'required', 'admission', 'to', 'the', 'Intensive', 'Care', 'Unit', 'due', 'to', 'the', 'onset', 'of', 'symptoms', 'compatible', 'with', 'sepsis', '(', 'fever', ',', 'hypoglycaemia', ',', 'poor', 'general', 'condition', 'and', 'poor', 'colouring', ')', '.', 'A', 'cranial', 'MRI', 'scan', 'was', 'performed', ',', 'which', 'showed', 'cerebellar', 'hypoplasia', '.', 'The', 'result', 'of', 'the', 'metabolic', 'study', 'confirms', 'a', 'congenital', 'protein', 'glycosylation', 'defect', '(', 'CGD', ')', 'type', 'Ia.', '\\n\\n\\n']\n",
"[['bilateral', 'convergent', 'strabismus'], ['inverted', 'nipples'], ['inborn', 'error', 'of', 'metabolism'], ['IEM'], ['sepsis'], ['cerebellar', 'hypoplasia'], ['congenital', 'protein', 'glycosylation', 'defect', '(', 'CGD', ')', 'type', 'Ia'], ['CGD'], ['hypoglycaemia']]\n",
"['congenital', 'protein', 'glycosylation', 'defect', '(', 'CGD', ')', 'type', 'Ia']\n",
"Ia\n",
"205\n",
"['Patient', 'aged', '53', 'years', 'at', 'the', 'time', 'of', 'diagnosis', 'with', 'a', 'personal', 'history', 'of', 'a', 'caesarean', 'section', ',', 'arterial', 'hypertension', 'and', 'tachycardia', 'treated', 'with', 'Atenolol', ',', 'who', 'came', 'to', 'the', 'Emergency', 'Department', 'with', 'a', '5', '-', 'month', 'history', 'of', 'progressive', 'dull', 'pain', 'in', 'the', 'left', 'flank', 'and', 'microhaematuria', '.', 'Ultrasound', 'and', 'then', 'abdominal', '-', 'pelvic', 'computerised', 'axial', 'tomography', '(', 'CAT', ')', 'scans', 'revealed', 'a', 'large', 'solid', 'mass', 'measuring', '20x16x13', 'cm', 'arising', 'from', 'the', 'left', 'renal', 'pole', ',', 'without', 'associated', 'venous', 'thrombosis', '.', 'Para', '-', 'aortic', 'adenopathies', 'were', 'also', 'observed', '.', 'All', 'this', 'was', 'compatible', 'with', 'a', 'left', 'renal', 'tumour', '.', '\\n\\n', 'In', 'view', 'of', 'this', 'finding', ',', 'an', 'extension', 'study', 'was', 'carried', 'out', 'with', 'a', 'general', 'analysis', 'showing', 'mild', 'iron', '-', 'deficiency', 'microcytic', 'anaemia', 'and', 'hyperuricaemia', ',', 'a', 'normal', 'chest', 'X', '-', 'ray', ',', 'a', 'bone', 'scan', 'showing', 'an', 'image', 'with', 'a', 'slight', 'increase', 'in', 'tracer', 'uptake', 'corresponding', 'to', 'soft', 'tissue', ',', 'with', 'a', 'rounded', 'morphology', ',', 'located', 'in', 'the', 'hypochondrium', 'and', 'in', 'the', 'hypochondrium', ',', 'located', 'in', 'the', 'left', 'hypochondrium', 'and', 'void', ',', 'exceeding', 'the', 'midline', ',', 'and', '3', 'foci', 'of', 'tracer', 'uptake', ',', 'one', 'in', 'the', 'left', 'iliac', 'blade', ',', 'the', 'second', 'in', 'the', 'left', 'hemivertebrae', 'L4', 'and', 'L5', 'and', 'the', 'third', 'at', 'the', 'level', 'of', 'the', 'right', 'hemivertebra', 'L2', ',', 'which', 'could', 'correspond', 'to', 'bone', 'involvement', 'due', 'to', 'contiguity', 'or', 'be', 'metastatic', ',', 'and', 'an', 'abdominal', 'MRI', 'scan', 'which', 'confirms', 'the', 'findings', 'of', 'the', 'CT', 'scan', '.', '\\n', 'With', 'the', 'presumptive', 'diagnosis', 'of', 'renal', 'cell', 'carcinoma', 'with', 'retroperitoneal', 'adenopathy', ',', 'the', 'patient', 'underwent', 'radical', 'left', 'nephrectomy', ',', 'removal', 'of', 'all', 'the', 'pararenal', 'fat', 'and', 'the', 'left', 'adrenal', 'gland', 'and', 'para', '-', 'aortic', 'and', 'interaortocaval', 'lymphadenectomy', 'from', 'the', 'renal', 'artery', 'to', '3', 'cm', 'from', 'the', 'common', 'iliac', 'artery', '.', 'Removal', 'of', 'the', 'mass', 'was', 'difficult', 'due', 'to', 'infiltration', 'of', 'the', 'transverse', 'mesocolon', 'and', 'tail', 'of', 'the', 'pancreas', ',', 'which', 'were', 'released', ',', 'leaving', 'the', 'mesocolon', 'untouched', '.', 'The', 'anatomo', '-', 'pathological', '(', 'A.P.', ')', 'result', 'was', 'as', 'follows', ':', 'Collision', 'renal', 'tumour', '(', 'Leiomyosarcoma', '(', '21', 'x', '15', 'cm', ')', 'and', 'renal', 'carcinoma', 'papillary', 'type', 'nuclear', 'grade', '3', '(', '7', 'x', '3.5', 'cm', ')', ')', '.', 'The', 'weight', 'of', 'the', 'whole', 'specimen', 'was', '2539', 'grams', '.', 'The', 'tumour', 'was', 'in', 'contact', 'with', 'the', 'surgical', 'edge', 'in', 'most', 'areas', '.', 'The', 'renal', 'parenchyma', 'was', 'microscopically', 'respected', 'and', 'no', 'tumour', 'infiltration', 'was', 'observed', '.', 'The', 'ureteral', 'fragment', 'and', 'renal', 'hilum', 'were', 'free', 'of', 'tumour', 'infiltration', '.', 'The', 'immunophenotypic', 'profile', 'of', 'the', 'tumour', 'was', 'as', 'follows', ':', 'Actin', ',', 'Desmin', ',', 'S-100', ',', 'Synaptofusin', 'and', 'CD', '56', 'and', 'c', '-', 'kit', 'negative', ';', 'Smooth', 'muscle', 'actin', 'positive', 'in', 'the', 'sarcomatous', 'zone', 'and', 'keratin', 'cocktail', '(', 'E1', ',', 'E3', ')', 'positive', 'in', 'the', 'carcinomatous', 'zone', '.', 'At', 'the', 'level', 'of', 'the', 'para', '-', 'aortic', 'chain', ',', '16', 'adenopathies', 'were', 'isolated', ',', 'the', 'largest', 'measuring', '2.5', 'cm', ',', 'with', 'metastases', 'in', '14', 'of', 'them', ',', '13', 'from', 'the', 'carcinoma', 'and', '1', 'with', 'mixed', 'metastases', '(', 'sarcoma+carcinoma', ')', '.', 'Six', 'adenopathies', 'were', 'isolated', 'in', 'the', 'interaortocaval', 'chain', ',', 'the', 'largest', 'measuring', '1.4', 'cm', ',', 'three', 'of', 'which', 'were', 'carcinoma', 'metastases', '.', 'The', 'mesocolic', 'bed', 'was', 'infiltrated', 'by', 'leiomyosarcoma', '.', 'In', 'the', 'perisuprarenal', 'adipose', 'tissue', '4', 'adenopathies', 'were', 'isolated', ',', '3', 'of', 'them', 'with', 'metastasis', 'of', 'the', 'carcinoma', 'and', 'another', 'with', 'mixed', 'metastasis', '(', 'carcinoma+sarcoma', ')', '.', 'The', 'left', 'adrenal', 'gland', ',', 'the', 'perirenal', 'fat', 'and', 'the', 'gall', 'bladder', 'showed', 'no', 'tumour', 'elements', '.', '\\n\\n', 'We', 'were', 'therefore', 'faced', 'with', 'a', 'renal', 'collision', 'tumour', 'consisting', 'of', 'a', 'stage', 'IV', 'papillary', 'type', 'renal', 'cancer', '(', 'pT3', '-', '4pN2', ')', 'according', 'to', 'the', 'TNM', 'classification', 'and', 'a', 'stage', 'IV', 'renal', 'leiomyosarcoma', '(', 'pT2bpN1', ')', 'according', 'to', 'the', 'AJCC', 'classification', ',', 'not', 'radically', 'resected', 'and', 'with', 'possible', 'bone', 'metastases', 'according', 'to', 'bone', 'scintigraphy', '.', '\\n', 'The', 'postoperative', 'period', 'was', 'uneventful', 'and', 'the', 'patient', 'was', 'referred', 'to', 'the', 'Medical', 'Oncology', 'Department', '.', 'It', 'was', 'decided', 'to', 'propose', 'complementary', 'chemotherapy', 'treatment', 'with', 'Ifosfamide', '5', 'g', '/', 'm2', 'in', 'a', 'continuous', 'infusion', 'of', '24h', 'x', '1', 'day', '+', 'Adriamycin', '60', 'mg', '/', 'm2', 'x', '1', 'day/21', 'days', 'against', 'the', 'sarcomatous', 'component', 'of', 'the', 'tumour', '.', 'A', 'CT', 'scan', 'was', 'previously', 'requested', 'in', 'which', 'a', 'small', 'soft', 'tissue', 'enlargement', 'was', 'observed', 'behind', 'the', 'pancreatic', 'tail', 'and', 'renal', 'bed', ',', 'which', 'could', 'be', 'compatible', 'with', 'present', 'disease', '.', '\\n', 'The', 'patient', 'started', 'treatment', 'according', 'to', 'the', 'planned', 'schedule', '3', 'weeks', 'after', 'surgery', '.', 'She', 'received', 'a', 'total', 'of', '6', 'cycles', 'with', 'good', 'clinical', 'tolerance', '.', 'After', 'the', '4th', 'cycle', ',', 'an', 'abdominal', 'CAT', 'scan', 'was', 'performed', ',', 'which', 'was', 'normal', ',', 'and', 'at', 'the', 'end', 'of', 'the', '6th', 'cycle', ',', 'a', 'bone', 'scan', 'was', 'performed', ',', 'which', 'showed', 'no', 'pathological', 'findings', '.', 'The', 'patient', 'underwent', 'regular', 'check', '-', 'ups', 'and', '3', 'months', 'later', 'a', 'chest', 'X', '-', 'ray', 'was', 'performed', 'showing', 'images', 'suggestive', 'of', 'bilateral', 'pulmonary', 'metastases', ',', 'which', 'were', 'confirmed', 'by', 'a', 'CT', 'scan', 'showing', 'multiple', 'bilateral', 'millimetric', 'pulmonary', 'metastases', '.', 'In', 'order', 'to', 'identify', 'the', 'origin', 'of', 'these', 'metastases', ',', 'Thoracic', 'Surgery', 'was', 'consulted', 'and', 'it', 'was', 'decided', 'to', 'perform', 'a', 'left', 'videothoracoscopy', 'with', 'biopsies', '.', 'The', 'P.A.', 'diagnosis', 'was', 'metastasis', 'of', 'poorly', 'differentiated', 'carcinoma', 'with', 'an', 'epithelial', 'component', '(', 'renal', ')', '.', 'In', 'view', 'of', 'this', 'diagnosis', ',', '6', 'months', 'after', 'finishing', 'the', 'first', 'chemotherapy', 'regimen', ',', 'it', 'was', 'decided', 'to', 'start', 'a', 'second', 'line', 'of', 'treatment', 'with', 'a', 'chemotherapy', 'regimen', 'with', 'Gemzitabine', 'and', 'Fluoropyrimidines9', 'that', 'had', 'proved', 'useful', 'in', 'stage', 'IV', 'renal', 'carcinoma', ':', 'Gemcitabine', '1000', 'mg', '/', 'm2', 'days', '1', 'and', '8', '+', 'Capecitabine', '1000', 'mg', '/', 'm2/12h', 'days', '1', '-', '14/21', 'days', ',', 'which', 'the', 'patient', 'accepted', '.', '\\n\\n', 'Treatment', 'was', 'started', 'with', 'a', '20', '%', 'dose', 'reduction', 'which', 'was', 'maintained', 'for', 'the', 'rest', 'of', 'the', 'treatment', 'given', 'the', 'patient', \"'s\", 'general', 'condition', '(', 'ECOG', ':', '1', '-', '2', ')', '.', 'After', 'the', 'second', 'cycle', ',', 'the', 'patient', 'suffered', 'a', 'complication', 'of', 'pulmonary', 'thromboembolism', 'from', 'which', 'she', 'recovered', 'but', 'which', 'caused', 'a', 'delay', 'of', '4', 'weeks', 'in', 'the', 'administration', 'of', 'the', 'third', 'cycle', '.', 'After', '6', 'cycles', 'of', 'treatment', ',', 'which', 'she', 'received', 'with', 'acceptable', 'tolerance', 'except', 'for', 'grade', '4', 'anaemia', ',', 'she', 'was', 're', '-', 'evaluated', 'with', 'a', 'body', 'CT', 'scan', 'which', 'showed', 'persistent', 'pulmonary', 'metastases', 'with', 'the', 'appearance', 'of', 'liver', 'and', 'spleen', 'metastases', 'and', 'local', 'relapse', '.', '\\n', 'In', 'view', 'of', 'this', 'progression', ',', 'treatment', 'with', 'IL-2', 'was', 'proposed', 'for', '6', 'weeks', '(', '1', 'week', 'of', 'induction', 'with', '18', 'Million', 'Units', '(', 'MU', ')', 'x', '5', 'days', 'and', '5', 'weeks', ':', '9', 'MU', 'days', '1', 'and', '2', 'and', '18', 'MU', 'days', '3', 'to', '5)10', '.', 'The', 'patient', 'accepted', 'the', 'treatment', 'with', 'moderate', 'toxicity', 'with', 'secondary', 'constitutional', 'symptoms', 'grade', '2', ',', 'anaemia', 'grade', '3', 'and', 'emesis', 'grade', '1', ',', 'maintaining', 'her', 'general', 'condition', '.', '\\n', 'At', 'the', 'end', 'of', 'treatment', ',', 'a', 'new', 're', '-', 'evaluation', 'was', 'performed', 'with', 'a', 'CT', 'scan', 'showing', 'progression', 'of', 'the', 'disease', 'with', 'a', 'large', 'mass', 'in', 'the', 'surgical', 'site', 'measuring', '19x10x5', 'cm', ',', 'which', 'had', 'grown', 'with', 'respect', 'to', 'the', 'previous', 'CT', 'scan', ',', 'and', 'persistent', 'metastases', 'in', 'the', 'rest', 'of', 'the', 'previous', 'sites', '.', 'The', 'patient', \"'s\", 'general', 'condition', 'worsened', ',', 'with', 'the', 'appearance', 'of', 'abdominal', 'and', 'lumbar', 'pain', ',', 'and', 'on', 'physical', 'examination', 'a', '5', 'cm', 'epigastric', 'mass', 'was', 'palpated', ',', 'corresponding', 'to', 'the', 'underlying', 'mass', '.', '\\n', 'Given', 'this', 'new', 'progression', ',', 'it', 'is', 'considered', 'that', 'the', 'tumour', 'is', 'resistant', 'to', 'chemotherapy', 'or', 'immunotherapy', 'schemes', 'against', 'renal', 'carcinoma', 'and', 'it', 'is', 'proposed', 'to', 'start', 'palliative', 'treatment', 'with', 'liposomal', 'Adriamycin', 'against', 'the', 'sarcomatous', 'component', 'of', 'the', 'tumour', '.', 'The', 'patient', 'accepted', 'the', 'proposal', 'and', 'received', 'a', 'first', 'cycle', '.', 'However', ',', 'a', 'week', 'later', 'she', 'went', 'to', 'the', 'emergency', 'department', 'for', 'hypovolemic', 'shock', 'with', 'metabolic', 'acidosis', 'and', 'pre', '-', 'renal', 'renal', 'failure', 'secondary', 'to', 'hyperemesis', 'of', '4', 'days', \"'\", 'evolution', 'and', 'grade', '4', 'anaemia', '.', 'The', 'patient', 'recovered', 'from', 'this', 'episode', 'but', 'a', 'week', 'later', 'she', 'began', 'with', 'faecal', 'vomiting', 'of', 'probable', 'obstructive', 'origin', 'due', 'to', 'compression', 'of', 'the', 'retroperitoneal', 'mass', ',', 'causing', 'progressive', 'deterioration', 'of', 'the', 'patient', 'and', 'the', 'patient', 'died', 'of', 'multi', '-', 'organ', 'failure', '19', 'months', 'after', 'diagnosis', '.', '\\n\\n\\n']\n",
"[['tumour'], ['arterial', 'hypertension'], ['venous', 'thrombosis'], ['Para', '-', 'aortic', 'adenopathies'], ['renal', 'tumour'], ['hyperuricaemia'], ['anaemia'], ['bone', 'involvement'], ['renal', 'cell', 'carcinoma', 'with', 'retroperitoneal', 'adenopathy'], ['tumour'], ['tumour'], ['tumour'], ['tumour'], ['tumour'], ['tumour'], ['renal', 'carcinoma'], ['metastases'], ['metastases'], ['metastases'], ['Leiomyosarcoma'], ['leiomyosarcoma'], ['renal', 'tumour'], ['tumour', 'infiltration'], ['tumour', 'infiltration'], ['sarcoma'], ['sarcoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma', 'metastases'], ['renal', 'collision', 'tumour'], ['tumour'], ['papillary', 'type', 'renal', 'cancer'], ['renal', 'leiomyosarcoma'], ['metastases'], ['bone', 'metastases'], ['leiomyosarcoma'], ['tumour'], ['sarcomatous', 'component', 'of', 'the', 'tumour'], ['renal', 'carcinoma'], ['metastases'], ['metastases'], ['metastases'], ['bilateral', 'pulmonary', 'metastases'], ['carcinoma'], ['carcinoma'], ['pulmonary', 'metastases'], ['pulmonary', 'metastases'], ['pulmonary', 'thromboembolism'], ['metastases'], ['metastases'], ['anaemia'], ['pulmonary', 'metastases'], ['anaemia'], ['emesis'], ['metastases'], ['tumour'], ['tumour'], ['renal', 'carcinoma'], ['anaemia'], ['hypovolemic', 'shock'], ['metabolic', 'acidosis'], ['hyperemesis'], ['multi', '-', 'organ', 'failure'], ['carcinoma'], ['sarcomatous', 'component', 'of', 'the', 'tumour']]\n",
"['sarcoma']\n",
"sarcoma\n",
"231\n",
"['Patient', 'aged', '53', 'years', 'at', 'the', 'time', 'of', 'diagnosis', 'with', 'a', 'personal', 'history', 'of', 'a', 'caesarean', 'section', ',', 'arterial', 'hypertension', 'and', 'tachycardia', 'treated', 'with', 'Atenolol', ',', 'who', 'came', 'to', 'the', 'Emergency', 'Department', 'with', 'a', '5', '-', 'month', 'history', 'of', 'progressive', 'dull', 'pain', 'in', 'the', 'left', 'flank', 'and', 'microhaematuria', '.', 'Ultrasound', 'and', 'then', 'abdominal', '-', 'pelvic', 'computerised', 'axial', 'tomography', '(', 'CAT', ')', 'scans', 'revealed', 'a', 'large', 'solid', 'mass', 'measuring', '20x16x13', 'cm', 'arising', 'from', 'the', 'left', 'renal', 'pole', ',', 'without', 'associated', 'venous', 'thrombosis', '.', 'Para', '-', 'aortic', 'adenopathies', 'were', 'also', 'observed', '.', 'All', 'this', 'was', 'compatible', 'with', 'a', 'left', 'renal', 'tumour', '.', '\\n\\n', 'In', 'view', 'of', 'this', 'finding', ',', 'an', 'extension', 'study', 'was', 'carried', 'out', 'with', 'a', 'general', 'analysis', 'showing', 'mild', 'iron', '-', 'deficiency', 'microcytic', 'anaemia', 'and', 'hyperuricaemia', ',', 'a', 'normal', 'chest', 'X', '-', 'ray', ',', 'a', 'bone', 'scan', 'showing', 'an', 'image', 'with', 'a', 'slight', 'increase', 'in', 'tracer', 'uptake', 'corresponding', 'to', 'soft', 'tissue', ',', 'with', 'a', 'rounded', 'morphology', ',', 'located', 'in', 'the', 'hypochondrium', 'and', 'in', 'the', 'hypochondrium', ',', 'located', 'in', 'the', 'left', 'hypochondrium', 'and', 'void', ',', 'exceeding', 'the', 'midline', ',', 'and', '3', 'foci', 'of', 'tracer', 'uptake', ',', 'one', 'in', 'the', 'left', 'iliac', 'blade', ',', 'the', 'second', 'in', 'the', 'left', 'hemivertebrae', 'L4', 'and', 'L5', 'and', 'the', 'third', 'at', 'the', 'level', 'of', 'the', 'right', 'hemivertebra', 'L2', ',', 'which', 'could', 'correspond', 'to', 'bone', 'involvement', 'due', 'to', 'contiguity', 'or', 'be', 'metastatic', ',', 'and', 'an', 'abdominal', 'MRI', 'scan', 'which', 'confirms', 'the', 'findings', 'of', 'the', 'CT', 'scan', '.', '\\n', 'With', 'the', 'presumptive', 'diagnosis', 'of', 'renal', 'cell', 'carcinoma', 'with', 'retroperitoneal', 'adenopathy', ',', 'the', 'patient', 'underwent', 'radical', 'left', 'nephrectomy', ',', 'removal', 'of', 'all', 'the', 'pararenal', 'fat', 'and', 'the', 'left', 'adrenal', 'gland', 'and', 'para', '-', 'aortic', 'and', 'interaortocaval', 'lymphadenectomy', 'from', 'the', 'renal', 'artery', 'to', '3', 'cm', 'from', 'the', 'common', 'iliac', 'artery', '.', 'Removal', 'of', 'the', 'mass', 'was', 'difficult', 'due', 'to', 'infiltration', 'of', 'the', 'transverse', 'mesocolon', 'and', 'tail', 'of', 'the', 'pancreas', ',', 'which', 'were', 'released', ',', 'leaving', 'the', 'mesocolon', 'untouched', '.', 'The', 'anatomo', '-', 'pathological', '(', 'A.P.', ')', 'result', 'was', 'as', 'follows', ':', 'Collision', 'renal', 'tumour', '(', 'Leiomyosarcoma', '(', '21', 'x', '15', 'cm', ')', 'and', 'renal', 'carcinoma', 'papillary', 'type', 'nuclear', 'grade', '3', '(', '7', 'x', '3.5', 'cm', ')', ')', '.', 'The', 'weight', 'of', 'the', 'whole', 'specimen', 'was', '2539', 'grams', '.', 'The', 'tumour', 'was', 'in', 'contact', 'with', 'the', 'surgical', 'edge', 'in', 'most', 'areas', '.', 'The', 'renal', 'parenchyma', 'was', 'microscopically', 'respected', 'and', 'no', 'tumour', 'infiltration', 'was', 'observed', '.', 'The', 'ureteral', 'fragment', 'and', 'renal', 'hilum', 'were', 'free', 'of', 'tumour', 'infiltration', '.', 'The', 'immunophenotypic', 'profile', 'of', 'the', 'tumour', 'was', 'as', 'follows', ':', 'Actin', ',', 'Desmin', ',', 'S-100', ',', 'Synaptofusin', 'and', 'CD', '56', 'and', 'c', '-', 'kit', 'negative', ';', 'Smooth', 'muscle', 'actin', 'positive', 'in', 'the', 'sarcomatous', 'zone', 'and', 'keratin', 'cocktail', '(', 'E1', ',', 'E3', ')', 'positive', 'in', 'the', 'carcinomatous', 'zone', '.', 'At', 'the', 'level', 'of', 'the', 'para', '-', 'aortic', 'chain', ',', '16', 'adenopathies', 'were', 'isolated', ',', 'the', 'largest', 'measuring', '2.5', 'cm', ',', 'with', 'metastases', 'in', '14', 'of', 'them', ',', '13', 'from', 'the', 'carcinoma', 'and', '1', 'with', 'mixed', 'metastases', '(', 'sarcoma+carcinoma', ')', '.', 'Six', 'adenopathies', 'were', 'isolated', 'in', 'the', 'interaortocaval', 'chain', ',', 'the', 'largest', 'measuring', '1.4', 'cm', ',', 'three', 'of', 'which', 'were', 'carcinoma', 'metastases', '.', 'The', 'mesocolic', 'bed', 'was', 'infiltrated', 'by', 'leiomyosarcoma', '.', 'In', 'the', 'perisuprarenal', 'adipose', 'tissue', '4', 'adenopathies', 'were', 'isolated', ',', '3', 'of', 'them', 'with', 'metastasis', 'of', 'the', 'carcinoma', 'and', 'another', 'with', 'mixed', 'metastasis', '(', 'carcinoma+sarcoma', ')', '.', 'The', 'left', 'adrenal', 'gland', ',', 'the', 'perirenal', 'fat', 'and', 'the', 'gall', 'bladder', 'showed', 'no', 'tumour', 'elements', '.', '\\n\\n', 'We', 'were', 'therefore', 'faced', 'with', 'a', 'renal', 'collision', 'tumour', 'consisting', 'of', 'a', 'stage', 'IV', 'papillary', 'type', 'renal', 'cancer', '(', 'pT3', '-', '4pN2', ')', 'according', 'to', 'the', 'TNM', 'classification', 'and', 'a', 'stage', 'IV', 'renal', 'leiomyosarcoma', '(', 'pT2bpN1', ')', 'according', 'to', 'the', 'AJCC', 'classification', ',', 'not', 'radically', 'resected', 'and', 'with', 'possible', 'bone', 'metastases', 'according', 'to', 'bone', 'scintigraphy', '.', '\\n', 'The', 'postoperative', 'period', 'was', 'uneventful', 'and', 'the', 'patient', 'was', 'referred', 'to', 'the', 'Medical', 'Oncology', 'Department', '.', 'It', 'was', 'decided', 'to', 'propose', 'complementary', 'chemotherapy', 'treatment', 'with', 'Ifosfamide', '5', 'g', '/', 'm2', 'in', 'a', 'continuous', 'infusion', 'of', '24h', 'x', '1', 'day', '+', 'Adriamycin', '60', 'mg', '/', 'm2', 'x', '1', 'day/21', 'days', 'against', 'the', 'sarcomatous', 'component', 'of', 'the', 'tumour', '.', 'A', 'CT', 'scan', 'was', 'previously', 'requested', 'in', 'which', 'a', 'small', 'soft', 'tissue', 'enlargement', 'was', 'observed', 'behind', 'the', 'pancreatic', 'tail', 'and', 'renal', 'bed', ',', 'which', 'could', 'be', 'compatible', 'with', 'present', 'disease', '.', '\\n', 'The', 'patient', 'started', 'treatment', 'according', 'to', 'the', 'planned', 'schedule', '3', 'weeks', 'after', 'surgery', '.', 'She', 'received', 'a', 'total', 'of', '6', 'cycles', 'with', 'good', 'clinical', 'tolerance', '.', 'After', 'the', '4th', 'cycle', ',', 'an', 'abdominal', 'CAT', 'scan', 'was', 'performed', ',', 'which', 'was', 'normal', ',', 'and', 'at', 'the', 'end', 'of', 'the', '6th', 'cycle', ',', 'a', 'bone', 'scan', 'was', 'performed', ',', 'which', 'showed', 'no', 'pathological', 'findings', '.', 'The', 'patient', 'underwent', 'regular', 'check', '-', 'ups', 'and', '3', 'months', 'later', 'a', 'chest', 'X', '-', 'ray', 'was', 'performed', 'showing', 'images', 'suggestive', 'of', 'bilateral', 'pulmonary', 'metastases', ',', 'which', 'were', 'confirmed', 'by', 'a', 'CT', 'scan', 'showing', 'multiple', 'bilateral', 'millimetric', 'pulmonary', 'metastases', '.', 'In', 'order', 'to', 'identify', 'the', 'origin', 'of', 'these', 'metastases', ',', 'Thoracic', 'Surgery', 'was', 'consulted', 'and', 'it', 'was', 'decided', 'to', 'perform', 'a', 'left', 'videothoracoscopy', 'with', 'biopsies', '.', 'The', 'P.A.', 'diagnosis', 'was', 'metastasis', 'of', 'poorly', 'differentiated', 'carcinoma', 'with', 'an', 'epithelial', 'component', '(', 'renal', ')', '.', 'In', 'view', 'of', 'this', 'diagnosis', ',', '6', 'months', 'after', 'finishing', 'the', 'first', 'chemotherapy', 'regimen', ',', 'it', 'was', 'decided', 'to', 'start', 'a', 'second', 'line', 'of', 'treatment', 'with', 'a', 'chemotherapy', 'regimen', 'with', 'Gemzitabine', 'and', 'Fluoropyrimidines9', 'that', 'had', 'proved', 'useful', 'in', 'stage', 'IV', 'renal', 'carcinoma', ':', 'Gemcitabine', '1000', 'mg', '/', 'm2', 'days', '1', 'and', '8', '+', 'Capecitabine', '1000', 'mg', '/', 'm2/12h', 'days', '1', '-', '14/21', 'days', ',', 'which', 'the', 'patient', 'accepted', '.', '\\n\\n', 'Treatment', 'was', 'started', 'with', 'a', '20', '%', 'dose', 'reduction', 'which', 'was', 'maintained', 'for', 'the', 'rest', 'of', 'the', 'treatment', 'given', 'the', 'patient', \"'s\", 'general', 'condition', '(', 'ECOG', ':', '1', '-', '2', ')', '.', 'After', 'the', 'second', 'cycle', ',', 'the', 'patient', 'suffered', 'a', 'complication', 'of', 'pulmonary', 'thromboembolism', 'from', 'which', 'she', 'recovered', 'but', 'which', 'caused', 'a', 'delay', 'of', '4', 'weeks', 'in', 'the', 'administration', 'of', 'the', 'third', 'cycle', '.', 'After', '6', 'cycles', 'of', 'treatment', ',', 'which', 'she', 'received', 'with', 'acceptable', 'tolerance', 'except', 'for', 'grade', '4', 'anaemia', ',', 'she', 'was', 're', '-', 'evaluated', 'with', 'a', 'body', 'CT', 'scan', 'which', 'showed', 'persistent', 'pulmonary', 'metastases', 'with', 'the', 'appearance', 'of', 'liver', 'and', 'spleen', 'metastases', 'and', 'local', 'relapse', '.', '\\n', 'In', 'view', 'of', 'this', 'progression', ',', 'treatment', 'with', 'IL-2', 'was', 'proposed', 'for', '6', 'weeks', '(', '1', 'week', 'of', 'induction', 'with', '18', 'Million', 'Units', '(', 'MU', ')', 'x', '5', 'days', 'and', '5', 'weeks', ':', '9', 'MU', 'days', '1', 'and', '2', 'and', '18', 'MU', 'days', '3', 'to', '5)10', '.', 'The', 'patient', 'accepted', 'the', 'treatment', 'with', 'moderate', 'toxicity', 'with', 'secondary', 'constitutional', 'symptoms', 'grade', '2', ',', 'anaemia', 'grade', '3', 'and', 'emesis', 'grade', '1', ',', 'maintaining', 'her', 'general', 'condition', '.', '\\n', 'At', 'the', 'end', 'of', 'treatment', ',', 'a', 'new', 're', '-', 'evaluation', 'was', 'performed', 'with', 'a', 'CT', 'scan', 'showing', 'progression', 'of', 'the', 'disease', 'with', 'a', 'large', 'mass', 'in', 'the', 'surgical', 'site', 'measuring', '19x10x5', 'cm', ',', 'which', 'had', 'grown', 'with', 'respect', 'to', 'the', 'previous', 'CT', 'scan', ',', 'and', 'persistent', 'metastases', 'in', 'the', 'rest', 'of', 'the', 'previous', 'sites', '.', 'The', 'patient', \"'s\", 'general', 'condition', 'worsened', ',', 'with', 'the', 'appearance', 'of', 'abdominal', 'and', 'lumbar', 'pain', ',', 'and', 'on', 'physical', 'examination', 'a', '5', 'cm', 'epigastric', 'mass', 'was', 'palpated', ',', 'corresponding', 'to', 'the', 'underlying', 'mass', '.', '\\n', 'Given', 'this', 'new', 'progression', ',', 'it', 'is', 'considered', 'that', 'the', 'tumour', 'is', 'resistant', 'to', 'chemotherapy', 'or', 'immunotherapy', 'schemes', 'against', 'renal', 'carcinoma', 'and', 'it', 'is', 'proposed', 'to', 'start', 'palliative', 'treatment', 'with', 'liposomal', 'Adriamycin', 'against', 'the', 'sarcomatous', 'component', 'of', 'the', 'tumour', '.', 'The', 'patient', 'accepted', 'the', 'proposal', 'and', 'received', 'a', 'first', 'cycle', '.', 'However', ',', 'a', 'week', 'later', 'she', 'went', 'to', 'the', 'emergency', 'department', 'for', 'hypovolemic', 'shock', 'with', 'metabolic', 'acidosis', 'and', 'pre', '-', 'renal', 'renal', 'failure', 'secondary', 'to', 'hyperemesis', 'of', '4', 'days', \"'\", 'evolution', 'and', 'grade', '4', 'anaemia', '.', 'The', 'patient', 'recovered', 'from', 'this', 'episode', 'but', 'a', 'week', 'later', 'she', 'began', 'with', 'faecal', 'vomiting', 'of', 'probable', 'obstructive', 'origin', 'due', 'to', 'compression', 'of', 'the', 'retroperitoneal', 'mass', ',', 'causing', 'progressive', 'deterioration', 'of', 'the', 'patient', 'and', 'the', 'patient', 'died', 'of', 'multi', '-', 'organ', 'failure', '19', 'months', 'after', 'diagnosis', '.', '\\n\\n\\n']\n",
"[['tumour'], ['arterial', 'hypertension'], ['venous', 'thrombosis'], ['Para', '-', 'aortic', 'adenopathies'], ['renal', 'tumour'], ['hyperuricaemia'], ['anaemia'], ['bone', 'involvement'], ['renal', 'cell', 'carcinoma', 'with', 'retroperitoneal', 'adenopathy'], ['tumour'], ['tumour'], ['tumour'], ['tumour'], ['tumour'], ['tumour'], ['renal', 'carcinoma'], ['metastases'], ['metastases'], ['metastases'], ['Leiomyosarcoma'], ['leiomyosarcoma'], ['renal', 'tumour'], ['tumour', 'infiltration'], ['tumour', 'infiltration'], ['sarcoma'], ['sarcoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma'], ['carcinoma', 'metastases'], ['renal', 'collision', 'tumour'], ['tumour'], ['papillary', 'type', 'renal', 'cancer'], ['renal', 'leiomyosarcoma'], ['metastases'], ['bone', 'metastases'], ['leiomyosarcoma'], ['tumour'], ['sarcomatous', 'component', 'of', 'the', 'tumour'], ['renal', 'carcinoma'], ['metastases'], ['metastases'], ['metastases'], ['bilateral', 'pulmonary', 'metastases'], ['carcinoma'], ['carcinoma'], ['pulmonary', 'metastases'], ['pulmonary', 'metastases'], ['pulmonary', 'thromboembolism'], ['metastases'], ['metastases'], ['anaemia'], ['pulmonary', 'metastases'], ['anaemia'], ['emesis'], ['metastases'], ['tumour'], ['tumour'], ['renal', 'carcinoma'], ['anaemia'], ['hypovolemic', 'shock'], ['metabolic', 'acidosis'], ['hyperemesis'], ['multi', '-', 'organ', 'failure'], ['carcinoma'], ['sarcomatous', 'component', 'of', 'the', 'tumour']]\n",
"['sarcoma']\n",
"sarcoma\n",
"231\n",
"['Male', 'patient', ',', 'black', ',', '21', 'years', 'of', 'age', ',', 'who', 'consulted', 'the', 'Maxillofacial', 'Surgery', 'Department', 'of', 'the', 'Hospital', 'Universitario', 'San', 'Vicente', 'Fundación', 'de', 'Medellín', 'attached', 'to', 'the', 'Universidad', 'de', 'Antioquia', ',', 'referred', 'by', 'a', 'dentist', 'from', 'a', 'public', 'health', 'institution', 'for', 'presenting', 'an', 'asymptomatic', 'increase', 'in', 'the', 'volume', 'of', 'the', 'mandible', ',', 'with', 'unknown', 'evolution', '.', '\\n', 'Intraorally', ',', 'excellent', 'dental', 'integrity', 'and', 'good', 'dental', 'occlusion', 'were', 'observed', ',', 'but', 'there', 'was', 'an', 'increase', 'in', 'volume', 'in', 'the', 'vestibular', 'region', 'involving', 'the', 'body', 'and', 'the', 'symphysis', ',', 'while', 'lingually', ',', 'the', 'expansion', 'of', 'the', 'table', 'was', 'only', 'in', 'the', 'area', 'of', 'the', 'lower', 'right', 'canine', 'and', 'bicuspids', '.', '\\n', 'The', 'initial', 'panoramic', 'X', '-', 'ray', 'showed', 'a', 'radiolucent', 'image', '10', 'cm', 'long', ',', 'multiloculated', ',', 'located', 'from', 'distal', 'tooth', '46', 'to', 'mesial', 'tooth', '33', '.', 'The', 'mesial', 'root', 'of', 'tooth', '46', 'and', 'the', 'root', 'of', 'tooth', '45', 'showed', 'rhizolysis', ',', 'with', 'possible', 'pulp', 'necrosis', '.', 'Teeth', '46', ',', '45', 'and', '44', 'had', 'minimal', 'mobility', ';', 'the', 'other', 'teeth', 'had', 'normal', 'vitality', 'and', 'the', 'lower', 'dental', 'canal', 'was', 'rejected', '.', 'There', 'is', 'no', 'alteration', 'in', 'the', 'sensitivity', 'of', 'the', 'mentonian', 'nerve', '.', '\\n\\n', 'The', 'patient', 'consented', 'to', 'the', 'entire', 'treatment', 'by', 'signing', 'the', 'informed', 'consent', 'form', '.', 'Prior', 'to', 'the', 'initial', 'biopsy', 'an', 'aspirate', 'was', 'taken', 'producing', 'a', 'citrine', 'fluid', 'and', 'the', 'first', 'histopathological', 'study', 'found', 'a', 'lax', 'connective', 'tissue', ',', 'some', 'multinucleated', 'giant', 'cells', 'with', 'few', 'nuclei', 'and', 'a', 'thin', 'band', 'of', 'keratin', ';', 'there', 'was', 'no', 'evidence', 'of', 'epithelial', 'tissue', 'but', 'it', 'was', 'considered', 'as', 'insufficient', 'sample', '.', 'Nevertheless', ',', 'a', 'diagnosis', 'of', 'keratocystic', 'odontogenic', 'tumour', 'was', 'made', ',', 'due', 'to', 'the', 'presence', 'of', 'a', 'keratin', 'band', '.', 'After', 'this', 'procedure', ',', 'the', 'lesion', 'became', 'superinfected', 'and', 'the', 'patient', 'had', 'to', 'be', 'hospitalised', 'due', 'to', 'the', 'severity', 'of', 'the', 'clinical', 'picture', ';', 'this', 'condition', 'was', 'used', 'to', 'perform', 'a', 'second', 'biopsy', 'under', 'general', 'anaesthesia', '8', 'days', 'later', ',', 'in', 'which', 'a', 'capsule', 'made', 'up', 'of', 'connective', 'tissue', 'with', 'an', 'intense', 'inflammatory', 'infiltrate', 'and', 'epithelial', 'tissue', 'with', 'several', 'layers', 'of', 'cells', 'was', 'found', ',', 'which', 'gave', 'the', 'diagnosis', ':', 'inflammatory', 'root', 'cyst', '.', 'Given', 'the', 'ambiguity', 'of', 'the', 'diagnosis', 'and', 'the', 'aggressiveness', 'of', 'the', 'lesion', ',', 'it', 'was', 'decided', 'to', 'perform', 'the', 'intervention', 'with', 'the', 'first', 'diagnosis', '(', 'keratocystic', 'odontogenic', 'tumour', '-exkeratocyst-', ')', '.', 'With', 'this', 'diagnosis', 'it', 'was', 'decided', 'to', 'plan', 'an', 'aggressive', 'surgical', 'treatment', 'and', 'it', 'was', 'decided', 'to', 'order', 'endodontic', 'treatment', 'from', '46', 'to', '33', '(', '9', 'teeth', ')', 'prior', 'to', 'the', 'surgical', 'procedure', ',', 'as', 'the', 'dental', 'apices', 'were', 'immersed', 'in', 'the', 'cavity', 'and', 'the', 'curettage', 'itself', 'would', 'cause', 'amputation', 'of', 'the', 'pulp', 'vascular', 'bundles', ',', 'in', 'addition', ',', 'according', 'to', 'the', 'diagnosis', ',', 'it', 'is', 'a', 'very', 'recurrent', 'lesion', '.', 'This', 'endodontic', 'treatment', 'lasted', '3', 'months', 'due', 'to', 'the', 'difficulty', 'of', 'sealing', 'the', 'canals', 'due', 'to', 'the', 'presence', 'of', 'an', 'amber', '-', 'coloured', 'liquid', 'draining', 'through', 'the', 'pulp', 'chambers', '.', 'After', 'this', 'time', ',', 'the', 'production', 'of', 'liquid', 'content', 'ceased', 'and', 'the', 'endodontic', 'treatment', 'was', 'completed', '.', '\\n\\n', 'The', 'surgical', 'phase', 'was', 'performed', 'under', 'general', 'anaesthesia', '.', 'A', 'trapezoidal', 'flap', 'was', 'raised', 'from', 'distal', '46', 'to', 'distal', '33', '.', 'When', 'the', 'flap', 'was', 'reflected', ',', 'it', 'was', 'found', 'to', 'be', 'expanded', 'in', 'its', 'entire', 'length', 'and', 'perforated', 'in', 'the', 'bicuspid', 'area', '(', 'teeth', '44', 'and', '45', ')', ',', 'where', 'the', 'biopsies', 'had', 'previously', 'been', 'taken', '.', 'We', 'proceeded', 'to', 'remove', 'all', 'the', 'expanded', 'vestibular', 'cortex', 'until', 'we', 'had', 'complete', 'access', 'to', 'the', 'cystic', 'cavity', '.', 'The', 'thick', 'fibrous', 'capsule', 'covering', 'the', 'osseous', 'defect', 'was', 'found', 'and', 'removed', '.', 'The', 'bone', 'cavity', 'is', 'reamed', 'with', 'rotary', 'cutting', 'instruments', 'and', 'the', 'entire', 'bone', 'defect', 'is', 'brushed', 'with', 'Carnoy', \"'s\", 'solution', '.', 'Some', 'perforations', 'of', 'the', 'lingual', 'cortex', 'are', 'visible', ',', 'which', 'are', 'cauterised', 'with', 'an', 'electroscalpel', 'due', 'to', 'the', 'risk', 'of', 'invasion', 'of', 'the', 'tumour', 'lesion', 'into', 'the', 'lingual', 'soft', 'tissues', '.', 'Due', 'to', 'the', 'weakening', 'of', 'the', 'mandibular', 'basilar', 'border', ',', 'a', 'reconstruction', 'plate', 'is', 'placed', 'to', 'avoid', 'intra-', 'or', 'postoperative', 'fracture', '.', 'Before', 'suturing', 'the', 'flap', ',', 'the', 'bone', 'defect', 'is', 'filled', 'with', 'fibrillar', 'collagen', 'and', 'medicated', 'with', 'analgesics', 'and', 'antibiotics', '.', 'The', 'tissue', 'obtained', 'is', 'sent', 'to', 'pathology', '.', 'The', 'histopathological', 'report', 'of', 'the', 'surgical', 'specimen', 'shows', 'fibroconnective', 'tissue', 'devoid', 'of', 'epithelium', 'and', 'a', 'post', '-', 'surgical', 'diagnosis', 'of', 'an', 'aneurysmal', 'bone', 'cyst', 'was', 'obtained', '.', '\\n', 'Because', 'the', 'second', 'biopsy', 'showed', 'an', 'epithelium', ',', 'the', 'diagnosis', 'was', 'misleading', 'but', ',', 'evaluating', 'the', 'three', 'histopathological', 'samples', ',', 'it', 'was', 'considered', 'that', 'the', 'epithelial', 'tissue', 'of', 'the', 'second', 'biopsy', 'was', 'the', 'product', 'of', 'the', 'inflammatory', 'reaction', ',', 'while', 'the', 'initial', 'biopsy', 'and', 'the', 'surgical', 'specimen', 'showed', 'no', 'epithelial', 'component', ';', 'it', 'was', 'decided', 'to', 'consider', 'it', 'an', 'aneurysmal', 'bone', 'cyst', '.', '\\n', 'The', 'patient', 'was', 'evaluated', 'at', '8', 'and', '15', 'days', 'and', 'then', 'at', '2', 'months', ',', '10', 'months', 'and', '2', 'years', '.', 'At', 'the', 'follow', '-', 'up', 'appointment', 'at', '2', 'months', 'pulp', 'necrosis', 'and', 'fistula', 'were', 'found', 'at', 'the', 'level', 'of', '47', ',', 'which', 'was', 'adjacent', 'to', 'the', 'lesion', 'and', 'the', 'mesial', 'root', 'apex', 'had', 'been', 'amputated', 'during', 'the', 'surgical', 'procedure', '.', 'Endodontics', 'was', 'performed', 'and', 'the', 'infection', 'resolved', '.', 'He', 'also', 'presented', 'with', 'right', 'mentonian', 'nerve', 'paraesthesia', 'and', 'was', 'prescribed', 'B', '-', 'complex', 'tablets', 'for', 'one', 'month', '.', 'A', 'new', 'post', '-', 'surgical', 'assessment', 'was', 'carried', 'out', '10', 'months', 'later', 'and', 'adequate', 'bone', 'healing', 'was', 'found', ',', 'but', 'there', 'was', 'an', 'occlusal', 'sequela', ',', 'as', 'the', 'teeth', 'that', 'were', 'left', 'without', 'bone', 'support', 'due', 'to', 'being', 'immersed', 'in', 'the', 'bone', 'defect', '(', 'teeth', '44', 'to', '33', ')', 'were', 'intruded', 'and', 'produced', 'an', 'open', 'bite', '.', 'Two', 'years', 'later', 'a', 'new', 'clinical', 'and', 'radiographic', 'check', '-', 'up', 'was', 'carried', 'out', 'where', 'it', 'was', 'found', 'that', ':', 'teeth', '44', 'to', '33', 'remained', 'in', 'open', 'bite', ',', 'tooth', '44', 'was', 'also', 'slightly', 'vestibularised', '.', 'The', 'panoramic', 'radiograph', 'shows', 'good', 'bone', 'filling', ',', 'but', 'the', 'three', '-', 'dimensional', 'tomography', 'shows', 'that', 'there', 'is', 'a', 'defect', 'of', 'about', '6', 'mm', 'in', 'diameter', 'that', 'compromises', 'the', 'apex', 'of', '44', 'and', '43', ',', 'and', 'that', 'the', 'bone', 'has', 'yet', 'to', 'regenerate', ',', 'and', 'the', 'paraesthesia', 'still', 'persists', 'two', 'years', 'later', '.', 'The', 'intraoral', 'image', 'shows', 'that', 'the', 'enlargement', 'produced', 'by', 'the', 'expansion', 'of', 'the', 'lesion', 'of', 'the', 'lingual', 'plate', 'at', 'the', 'level', 'of', 'teeth', '43', ',', '44', 'and', '45', 'still', 'persists', '.', '\\n\\n\\n']\n",
"[['pulp', 'necrosis'], ['keratocystic', 'odontogenic', 'tumour'], ['keratocystic', 'odontogenic', 'tumour'], ['inflammatory', 'root', 'cyst'], ['exkeratocyst'], ['lesion', 'became', 'superinfected'], ['lesion'], ['lesion'], ['lesion'], ['aneurysmal', 'bone', 'cyst'], ['tumour', 'lesion'], ['bone', 'defect'], ['bone', 'defect'], ['cystic', 'cavity'], ['lesion'], ['perforations', 'of', 'the', 'lingual', 'cortex'], ['aneurysmal', 'bone', 'cyst'], ['open', 'bite'], ['open', 'bite'], ['infection'], ['bone', 'defect'], ['pulp', 'necrosis'], ['lesion'], ['lesion']]\n",
"['exkeratocyst']\n",
"exkeratocyst\n",
"638\n"
]
}
],
"source": [
"labels_tokenized = []\n",
"idx =-1\n",
"for hct, et in zip(HCs_tokenized, Ent_tokenized):\n",
" idx+=1\n",
" labels = []\n",
" for i in range(len(hct)):\n",
" #Labels: 0->'O'; 1->'B'; 2->'I'\n",
" #labels.append('O')\n",
" labels.append(0)\n",
"\n",
" #For Entities (Diseases|Enfermedades)\n",
" for enf in et:\n",
" first = True\n",
" for e in enf:\n",
" if first == True:\n",
" try:\n",
" #labels[hct.index(e)] = 'B'\n",
" #labels[posLab] = 'B'\n",
" indices = find_idx(hct, e)\n",
" if len(indices) > 1:\n",
" for id in indices:\n",
" labels[id] = 1\n",
" else:\n",
" labels[hct.index(e)] = 1\n",
" \n",
" first = False\n",
" except:\n",
" first = False\n",
" if e == \"sarcoma+carcinoma\" or e == \"carcinoma+sarcoma\":\n",
" continue\n",
" print(hct)\n",
" print(et)\n",
" print(enf)\n",
" print(e)\n",
" print(idx)\n",
" else:\n",
" try:\n",
" #labels[hct.index(e)] = 'I'\n",
" #labels[posLab] = 'I'\n",
" indices = find_idx(hct, e)\n",
" if len(indices) > 1:\n",
" for id in indices:\n",
" if labels[id-1] != 0:\n",
" labels[id] = 2\n",
" else:\n",
" labels[hct.index(e)] = 2\n",
" except:\n",
" if e == \"sarcoma+carcinoma\" or e == \"carcinoma+sarcoma\":\n",
" continue\n",
" print(hct)\n",
" print(et)\n",
" print(enf)\n",
" print(e)\n",
" print(idx)\n",
"\n",
" labels_tokenized.append(labels)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "jXKczS_fKpLc",
"outputId": "d385f71c-5507-4a33-8cce-f5d2195354f9"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"A\t0\n",
"73\t0\n",
"-\t0\n",
"year\t0\n",
"-\t0\n",
"old\t0\n",
"patient\t0\n",
"with\t0\n",
"a\t0\n",
"history\t0\n",
"of\t0\n",
"arterial\t1\n",
"hypertension\t2\n",
"and\t0\n",
"polyarthrosis\t1\n",
"presented\t0\n",
"to\t0\n",
"the\t0\n",
"emergency\t0\n",
"department\t0\n",
"with\t0\n",
"abdominal\t0\n",
"distension\t0\n",
"and\t0\n",
"pain\t0\n",
"associated\t0\n",
"with\t0\n",
"constipation\t0\n",
"and\t0\n",
"febrile\t0\n",
"fever\t0\n",
".\t0\n",
"The\t0\n",
"symptoms\t0\n",
"had\t0\n",
"started\t0\n",
"three\t0\n",
"weeks\t0\n",
"earlier\t0\n",
"and\t0\n",
"worsened\t0\n",
"during\t0\n",
"the\t0\n",
"four\t0\n",
"days\t0\n",
"prior\t0\n",
"to\t0\n",
"admission\t0\n",
".\t0\n",
"During\t0\n",
"this\t0\n",
"period\t0\n",
",\t0\n",
"an\t0\n",
"upper\t0\n",
"gastrointestinal\t0\n",
"fibroendoscopy\t0\n",
"(\t0\n",
"oesophagus\t0\n",
",\t0\n",
"stomach\t0\n",
"and\t0\n",
"duodenum\t0\n",
")\t0\n",
"and\t0\n",
"a\t0\n",
"colonoscopy\t0\n",
"(\t0\n",
"up\t0\n",
"to\t0\n",
"the\t0\n",
"splenic\t0\n",
"angle\t0\n",
")\t0\n",
"were\t0\n",
"performed\t0\n",
",\t0\n",
"but\t0\n",
"no\t0\n",
"abnormalities\t0\n",
"were\t0\n",
"found\t0\n",
".\t0\n",
"\n",
"\t0\n",
"Physical\t0\n",
"examination\t0\n",
"revealed\t0\n",
"a\t0\n",
"low\t0\n",
"-\t0\n",
"grade\t0\n",
"fever\t0\n",
"(\t0\n",
"37.6º\t0\n",
"C\t0\n",
")\t0\n",
",\t0\n",
"a\t0\n",
"distended\t0\n",
"abdomen\t0\n",
",\t0\n",
"diffusely\t0\n",
"painful\t0\n",
"on\t0\n",
"palpation\t0\n",
",\t0\n",
"tympanised\t0\n",
"on\t0\n",
"percussion\t0\n",
",\t0\n",
"with\t0\n",
"scant\t0\n",
"borborygmi\t0\n",
"but\t0\n",
"no\t0\n",
"evidence\t0\n",
"of\t0\n",
"peritonism\t0\n",
",\t0\n",
"pulmonary\t0\n",
"auscultation\t0\n",
"with\t0\n",
"decreased\t0\n",
"ventilation\t0\n",
"in\t0\n",
"the\t0\n",
"lower\t1\n",
"half\t0\n",
"of\t0\n",
"the\t0\n",
"right\t0\n",
"hemithorax\t0\n",
"and\t0\n",
"the\t0\n",
"onset\t0\n",
"of\t0\n",
"intense\t0\n",
"pain\t0\n",
"on\t0\n",
"palpation\t0\n",
"and\t0\n",
"percussion\t0\n",
"of\t0\n",
"the\t0\n",
"last\t0\n",
"three\t0\n",
"dorsal\t0\n",
"spinous\t0\n",
"processes\t0\n",
".\t0\n",
"\n",
"\t0\n",
"Analyses\t0\n",
"showed\t0\n",
"8.2\t0\n",
"x\t0\n",
"109\t0\n",
"leukocytes\t0\n",
"/\t0\n",
"L\t0\n",
",\t0\n",
"haemoglobin\t0\n",
"136\t0\n",
"g\t0\n",
"/\t0\n",
"L\t0\n",
",\t0\n",
"platelets\t0\n",
"186\t0\n",
"x\t0\n",
"109\t0\n",
"/\t0\n",
"L.\t0\n",
"Except\t0\n",
"for\t0\n",
"glycaemia\t0\n",
"(\t0\n",
"123\t0\n",
"mg\t0\n",
"/\t0\n",
"dl\t0\n",
")\t0\n",
",\t0\n",
"the\t0\n",
"following\t0\n",
"laboratory\t0\n",
"parameters\t0\n",
"were\t0\n",
"normal\t0\n",
"or\t0\n",
"negative\t0\n",
":\t0\n",
"urea\t0\n",
",\t0\n",
"creatinine\t0\n",
",\t0\n",
"bilirubin\t0\n",
",\t0\n",
"transaminases\t0\n",
",\t0\n",
"gamma\t0\n",
"-\t0\n",
"glutamyltranspeptidase\t0\n",
",\t0\n",
"sodium\t0\n",
",\t0\n",
"potassium\t0\n",
",\t0\n",
"chlorine\t0\n",
",\t0\n",
"calcium\t0\n",
",\t0\n",
"phosphorus\t0\n",
",\t0\n",
"creatine\t0\n",
"phosphokinase\t0\n",
",\t0\n",
"amylase\t0\n",
",\t0\n",
"lactate\t0\n",
"dehydrogenase\t0\n",
"(\t0\n",
"LDH\t0\n",
")\t0\n",
",\t0\n",
"proteinogram\t0\n",
",\t0\n",
"immunoglobulin\t0\n",
"dosage\t0\n",
",\t0\n",
"alpha\t0\n",
"-\t0\n",
"fetoprotein\t0\n",
",\t0\n",
"CA\t0\n",
"19\t0\n",
"antigens\t0\n",
".\t0\n",
"9\t0\n",
"and\t0\n",
"CA\t0\n",
"125\t0\n",
"antigens\t0\n",
",\t0\n",
"as\t0\n",
"well\t0\n",
"as\t0\n",
"general\t0\n",
"urinalysis\t0\n",
".\t0\n",
"ESR\t0\n",
"and\t0\n",
"C\t0\n",
"-\t0\n",
"reactive\t0\n",
"protein\t0\n",
"were\t0\n",
"elevated\t0\n",
",\t0\n",
"with\t0\n",
"values\t0\n",
"of\t0\n",
"85\t0\n",
"mm\t0\n",
"/\t0\n",
"1\t0\n",
"h\t0\n",
"and\t0\n",
"133\t0\n",
"mg\t0\n",
"/\t0\n",
"L\t0\n",
"(\t0\n",
"normal\t0\n",
"<\t0\n",
"5\t0\n",
"mg\t0\n",
"/\t0\n",
"L\t0\n",
")\t0\n",
",\t0\n",
"respectively\t0\n",
".\t0\n",
"Mantoux\t0\n",
"intradermal\t0\n",
"reaction\t0\n",
"(\t0\n",
"10\t0\n",
"IU\t0\n",
"RT-23\t0\n",
")\t0\n",
"was\t0\n",
"positive\t0\n",
",\t0\n",
"with\t0\n",
"an\t0\n",
"induration\t0\n",
"of\t0\n",
"25\t0\n",
"mm\t0\n",
".\t0\n",
"Chest\t0\n",
"X\t0\n",
"-\t0\n",
"ray\t0\n",
"showed\t0\n",
"an\t0\n",
"image\t0\n",
"compatible\t0\n",
"with\t0\n",
"right\t0\n",
"lower\t1\n",
"lobe\t2\n",
"atelectasis\t2\n",
"in\t0\n",
"the\t0\n",
"context\t0\n",
"of\t0\n",
"an\t0\n",
"ipsilateral\t0\n",
"pleural\t1\n",
"effusion\t2\n",
".\t0\n",
"There\t0\n",
"were\t0\n",
"no\t0\n",
"signs\t0\n",
"suggestive\t0\n",
"of\t0\n",
"adenopathy\t0\n",
"or\t0\n",
"alterations\t0\n",
"in\t0\n",
"the\t0\n",
"cardiopericardial\t0\n",
"silhouette\t0\n",
".\t0\n",
"A\t0\n",
"thoracoabdominal\t0\n",
"CT\t0\n",
"scan\t0\n",
"confirmed\t0\n",
"the\t0\n",
"existence\t0\n",
"of\t0\n",
"a\t0\n",
"right\t0\n",
"pleural\t1\n",
"effusion\t2\n",
"and\t0\n",
"identified\t0\n",
"prominent\t0\n",
"degenerative\t0\n",
"changes\t0\n",
"along\t0\n",
"the\t0\n",
"dorsolumbar\t0\n",
"spine\t0\n",
"but\t0\n",
",\t0\n",
"above\t0\n",
"all\t0\n",
",\t0\n",
"erosions\t0\n",
"in\t0\n",
"the\t0\n",
"vertebral\t0\n",
"plates\t0\n",
"adjacent\t0\n",
"to\t0\n",
"the\t0\n",
"D10\t0\n",
"-\t0\n",
"D11\t0\n",
"disc\t0\n",
"space\t0\n",
".\t0\n",
"A\t0\n",
"lumbar\t0\n",
"MRI\t0\n",
"showed\t0\n",
"hyposignal\t0\n",
"on\t0\n",
"T1\t0\n",
"-\t0\n",
"weighted\t0\n",
"sequences\t0\n",
"and\t0\n",
"hypersignal\t0\n",
"on\t0\n",
"T2\t0\n",
"-\t0\n",
"weighted\t0\n",
"sequences\t0\n",
"in\t0\n",
"these\t0\n",
"vertebrae\t0\n",
"and\t0\n",
"their\t0\n",
"corresponding\t0\n",
"disc\t0\n",
",\t0\n",
"with\t0\n",
"morphological\t0\n",
"alterations\t0\n",
"typical\t0\n",
"of\t0\n",
"infectious\t1\n",
"spondylodiscitis\t2\n",
"D10\t2\n",
"-\t2\n",
"D11\t2\n",
".\t0\n",
"Three\t0\n",
"serial\t0\n",
"blood\t0\n",
"cultures\t0\n",
"were\t0\n",
"negative\t0\n",
".\t0\n",
"Samples\t0\n",
"obtained\t0\n",
"by\t0\n",
"aspiration\t0\n",
"of\t0\n",
"the\t0\n",
"D10\t0\n",
"-\t0\n",
"D11\t0\n",
"space\t0\n",
"showed\t0\n",
"gram\t0\n",
"-\t0\n",
"positive\t0\n",
"cocci\t0\n",
"chains\t0\n",
",\t0\n",
"which\t0\n",
"were\t0\n",
"subsequently\t0\n",
"recovered\t0\n",
"and\t0\n",
"typed\t0\n",
"as\t0\n",
"penicillin\t0\n",
"-\t0\n",
"sensitive\t0\n",
"Streptococcus\t0\n",
"pneumoniae\t0\n",
".\t0\n",
"Pleural\t0\n",
"fluid\t0\n",
"analysis\t0\n",
"showed\t0\n",
"pH\t0\n",
":\t0\n",
"7.55\t0\n",
";\t0\n",
"leucocytes\t0\n",
":\t0\n",
"8.4\t0\n",
"x\t0\n",
"109\t0\n",
"/\t0\n",
"L\t0\n",
"(\t0\n",
"58\t0\n",
"%\t0\n",
"neutrophils\t0\n",
",\t0\n",
"26\t0\n",
"%\t0\n",
"eosinophils\t0\n",
",\t0\n",
"16\t0\n",
"%\t0\n",
"lymphocytes\t0\n",
")\t0\n",
",\t0\n",
"protein\t0\n",
":\t0\n",
"48\t0\n",
"g\t0\n",
"/\t0\n",
"L\t0\n",
"(\t0\n",
"ratio\t0\n",
"to\t0\n",
"serum\t0\n",
"protein\t0\n",
":\t0\n",
"0.65\t0\n",
")\t0\n",
",\t0\n",
"glucose\t0\n",
":\t0\n",
"125\t0\n",
"mg\t0\n",
"/\t0\n",
"dl\t0\n",
",\t0\n",
"ADA\t0\n",
":\t0\n",
"25.92\t0\n",
"IU\t0\n",
"/\t0\n",
"ml\t0\n",
",\t0\n",
"LDH\t0\n",
":\t0\n",
"362\t0\n",
"U\t0\n",
"/\t0\n",
"L\t0\n",
"(\t0\n",
"pleural\t1\n",
"LDH\t0\n",
"/\t0\n",
"serum\t0\n",
"LDH\t0\n",
"ratio\t0\n",
":\t0\n",
"0.8\t0\n",
")\t0\n",
".\t0\n",
"Both\t0\n",
"auramine\t0\n",
"-\t0\n",
"rhodamine\t0\n",
"staining\t0\n",
"and\t0\n",
"Löwenstein\t0\n",
"-\t0\n",
"Jensen\t0\n",
"medium\t0\n",
"culture\t0\n",
"of\t0\n",
"pleural\t1\n",
"fluid\t0\n",
"were\t0\n",
"negative\t0\n",
"and\t0\n",
"cytology\t0\n",
"showed\t0\n",
"no\t0\n",
"evidence\t0\n",
"of\t0\n",
"neoplastic\t0\n",
"cells\t0\n",
".\t0\n",
"\n",
"\n",
"\t0\n",
"The\t0\n",
"patient\t0\n",
"was\t0\n",
"initially\t0\n",
"treated\t0\n",
"intravenously\t0\n",
"with\t0\n",
"amoxicillin\t0\n",
"+\t0\n",
"clavulanic\t0\n",
"acid\t0\n",
"(\t0\n",
"1\t0\n",
"g\t0\n",
"/\t0\n",
"200\t0\n",
"mg\t0\n",
",\t0\n",
"every\t0\n",
"8\t0\n",
"hours\t0\n",
")\t0\n",
".\t0\n",
"After\t0\n",
"21\t0\n",
"days\t0\n",
",\t0\n",
"she\t0\n",
"was\t0\n",
"switched\t0\n",
"to\t0\n",
"the\t0\n",
"oral\t0\n",
"route\t0\n",
"(\t0\n",
"875\t0\n",
"/\t0\n",
"125\t0\n",
"mg\t0\n",
",\t0\n",
"every\t0\n",
"8\t0\n",
"hours\t0\n",
")\t0\n",
"for\t0\n",
"6\t0\n",
"weeks\t0\n",
".\t0\n",
"The\t0\n",
"evolution\t0\n",
"was\t0\n",
"favourable\t0\n",
"and\t0\n",
"she\t0\n",
"was\t0\n",
"able\t0\n",
"to\t0\n",
"start\t0\n",
"walking\t0\n",
"with\t0\n",
"a\t0\n",
"dorsolumbar\t0\n",
"corset\t0\n",
"after\t0\n",
"the\t0\n",
"fourth\t0\n",
"week\t0\n",
".\t0\n",
"One\t0\n",
"month\t0\n",
"after\t0\n",
"the\t0\n",
"end\t0\n",
"of\t0\n",
"antibiotic\t0\n",
"therapy\t0\n",
",\t0\n",
"a\t0\n",
"control\t0\n",
"chest\t0\n",
"CT\t0\n",
"scan\t0\n",
"still\t0\n",
"showed\t0\n",
"a\t0\n",
"discrete\t0\n",
"pleural\t1\n",
"effusion\t2\n",
",\t0\n",
"but\t0\n",
"the\t0\n",
"patient\t0\n",
"had\t0\n",
"only\t0\n",
"mild\t0\n",
"mechanical\t0\n",
"dorsalgia\t0\n",
",\t0\n",
"her\t0\n",
"ESR\t0\n",
"had\t0\n",
"decreased\t0\n",
"to\t0\n",
"21\t0\n",
"mm\t0\n",
"/\t0\n",
"1\t0\n",
"h\t0\n",
"and\t0\n",
"her\t0\n",
"CRP\t0\n",
"was\t0\n",
"2.4\t0\n",
"mg\t0\n",
"/\t0\n",
"L.\t0\n",
"Outpatient\t0\n",
"follow\t0\n",
"-\t0\n",
"up\t0\n",
"continued\t0\n",
"for\t0\n",
"a\t0\n",
"further\t0\n",
"three\t0\n",
"years\t0\n",
",\t0\n",
"during\t0\n",
"which\t0\n",
"time\t0\n",
"the\t0\n",
"evolution\t0\n",
"was\t0\n",
"favourable\t0\n",
"and\t0\n",
"a\t0\n",
"D10\t0\n",
"-\t0\n",
"D11\t0\n",
"vertebral\t0\n",
"block\t0\n",
"was\t0\n",
"formed\t0\n",
".\t0\n",
"\n",
"\n",
"\n",
"\t0\n"
]
}
],
"source": [
"j = 0\n",
"for i in range(len(HCs_tokenized[j])):\n",
" print(str(HCs_tokenized[j][i]) + \"\\t\" + str(labels_tokenized[j][i]))"
]
},
{
"cell_type": "markdown",
"source": [
"# Validating tokenization and alignment with the BIO tags."
],
"metadata": {
"id": "laU64q79UYZy"
}
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "jcJBsudYKpLc",
"outputId": "13ea2860-19b7-4ee1-ddf4-ce607a60e80f"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Everything is aligned!\n"
]
}
],
"source": [
"flag = 0\n",
"for st, lt in zip(HCs_tokenized, labels_tokenized):\n",
" if len(st) != len(lt):\n",
" print(st)\n",
" print(lt)\n",
" flag = 1\n",
"if flag==0:\n",
" print(\"Everything is aligned!\")"
]
},
{
"cell_type": "markdown",
"source": [
"# Sentence tokenization"
],
"metadata": {
"id": "lvR3V8qXUbvR"
}
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"id": "-nHHwKmUKpLd"
},
"outputs": [],
"source": [
"sent_tokenized = []\n",
"label_sent_tokenized = []\n",
"for ht, lht in zip(HCs_tokenized, labels_tokenized):\n",
" st = []; lbst = []\n",
" for h, l in zip(ht,lht):\n",
" if h != \".\":\n",
" st.append(h)\n",
" lbst.append(l)\n",
" else:\n",
" st.append(\".\")\n",
" lbst.append(0)\n",
" sent_tokenized.append(st)\n",
" label_sent_tokenized.append(lbst)\n",
" st = []; lbst = []"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "8Lo6zzhGKpLd",
"outputId": "6d68e803-7ad4-4970-b4bf-b34be7fbdd37"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"11668"
]
},
"metadata": {},
"execution_count": 24
}
],
"source": [
"len(sent_tokenized)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5Ei7TrATKpLd",
"outputId": "93b623a3-ac12-4500-b6f4-506972dcad9f"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['A',\n",
" '73',\n",
" '-',\n",
" 'year',\n",
" '-',\n",
" 'old',\n",
" 'patient',\n",
" 'with',\n",
" 'a',\n",
" 'history',\n",
" 'of',\n",
" 'arterial',\n",
" 'hypertension',\n",
" 'and',\n",
" 'polyarthrosis',\n",
" 'presented',\n",
" 'to',\n",
" 'the',\n",
" 'emergency',\n",
" 'department',\n",
" 'with',\n",
" 'abdominal',\n",
" 'distension',\n",
" 'and',\n",
" 'pain',\n",
" 'associated',\n",
" 'with',\n",
" 'constipation',\n",
" 'and',\n",
" 'febrile',\n",
" 'fever',\n",
" '.']"
]
},
"metadata": {},
"execution_count": 25
}
],
"source": [
"sent_tokenized[0]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "KistaXEmKpLe",
"outputId": "b07ce5d3-df79-474e-a7fb-983632bb38d3"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"11668"
]
},
"metadata": {},
"execution_count": 26
}
],
"source": [
"len(label_sent_tokenized)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "m9qR1AxbKpLe",
"outputId": "66600bb9-dcae-4717-9cc9-9422115b4e2e"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 2,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0]"
]
},
"metadata": {},
"execution_count": 27
}
],
"source": [
"label_sent_tokenized[0]"
]
},
{
"cell_type": "markdown",
"source": [
"# Disease mentions identification as a Token classification problem"
],
"metadata": {
"id": "l5-_fyqPUh3b"
}
},
{
"cell_type": "markdown",
"source": [
"# Building the Dataset"
],
"metadata": {
"id": "098UDE8VUjXf"
}
},
{
"cell_type": "markdown",
"source": [
"## Case as a whole is given as input"
],
"metadata": {
"id": "8FT7wAx4VeaU"
}
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"id": "5qVPJ2LVKpLe"
},
"outputs": [],
"source": [
"dic = {\"tokens\": HCs_tokenized, \"ner_tags\": labels_tokenized} #For the whole clinical case. We used this option for our paper.\n",
"#dic = {\"tokens\": sent_tokenized, \"ner_tags\": label_sent_tokenized} #Use this option if you want to check the model performance with sentences tokenized by \". \" b"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"id": "UzXp6PO2KpLe"
},
"outputs": [],
"source": [
"dataset = Dataset.from_dict(dic)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "NhqH78gAKpLf",
"outputId": "e3fe5c5f-764c-4bf8-c599-6bda653a4944"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Dataset({\n",
" features: ['tokens', 'ner_tags'],\n",
" num_rows: 741\n",
"})"
]
},
"metadata": {},
"execution_count": 30
}
],
"source": [
"dataset"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"id": "jspsCq2OKpLf"
},
"outputs": [],
"source": [
"#For training, validation, and test partitions\n",
"\"\"\"\n",
"#Train, val, test partitions\n",
"train_test = dataset.train_test_split()\n",
"test_val = train_test['test'].train_test_split()\n",
"raw_datasets = DatasetDict({\n",
" 'train': train_test['train'],\n",
" 'validation': test_val['train'],\n",
" 'test': test_val['test']\n",
" })\n",
"\"\"\"\n",
"\n",
"#Just for training and validation partitions\n",
"train_test = dataset.train_test_split()\n",
"raw_datasets = DatasetDict({\n",
" 'train': train_test['train'],\n",
" 'validation': train_test['test']\n",
" })"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "isqyq9VIKpLf",
"outputId": "7539fb10-1ab0-43cd-d7a5-bdb30514b891"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"DatasetDict({\n",
" train: Dataset({\n",
" features: ['tokens', 'ner_tags'],\n",
" num_rows: 555\n",
" })\n",
" validation: Dataset({\n",
" features: ['tokens', 'ner_tags'],\n",
" num_rows: 186\n",
" })\n",
"})"
]
},
"metadata": {},
"execution_count": 32
}
],
"source": [
"raw_datasets"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "pEJhTKi8KpLf",
"outputId": "fe3f3c2b-226c-42eb-9674-f69fce8a61e9"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 2,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0]"
]
},
"metadata": {},
"execution_count": 33
}
],
"source": [
"raw_datasets[\"train\"][0][\"ner_tags\"]\n",
"#raw_datasets[\"train\"][0][\"pos_tags\"]\n",
"#raw_datasets[\"train\"][0][\"chunk_tags\"]"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gtrDybrCKpLf",
"outputId": "1217e93b-928c-4db1-bd22-8f1e3c2ebf2c"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Dataset({\n",
" features: ['tokens', 'ner_tags'],\n",
" num_rows: 555\n",
"})"
]
},
"metadata": {},
"execution_count": 34
}
],
"source": [
"raw_datasets['train']"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "QjdG8yi0KpLf",
"outputId": "20bc5bed-36bf-4e05-f80b-f75638f5196b"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['O', 'B', 'I']"
]
},
"metadata": {},
"execution_count": 35
}
],
"source": [
"label_names = ['O','B','I']\n",
"label_names"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "9kGXdiCsKpLg",
"outputId": "754c7613-c7c3-46fc-a109-d8a67d6ab072"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"This is a 70 - year - old male patient , who was admitted to the emergency department of the Hospital Pablo Tobón Uribe , with symptoms of approximately one hour of evolution consisting of chest tightness , general malaise , asthenia and diaphoresis ; which began after having ingested 100 mg of sildenafil , denies ingestion of another sexual stimulant or cocaine and without sexual intercourse after its consumption . The patient 's only clinical history was arterial hypertension , pharmacologically controlled , and he denies previous episodes of angina or nitrate consumption . The clinical examination and vital signs were normal ; however , after the initial assessment he presented cardiorespiratory arrest secondary to ventricular fibrillation with response to a single defibrillation of 200 joules . \n",
" The initial electrocardiogram showed ST - segment elevation in the inferior ( II , III and aVF ) and anterior ( V2 - V4 ) leads with reciprocal changes in aVL , with no electrocardiographic extension to the right ventricle . \n",
"\n",
" Cardiac enzymes on admission revealed a creatine kinase ( CK ) of 170 and a creatine phosphokinase - MB fraction ( CK - MB ) of 6 . Electrolytes , coagulation tests and blood cell counts were normal . \n",
" Initial management was with aspirin 100 mg , lovastatin 40 mg daily , metoprolol 25 mg every 12 hours , enoxaparin 60 mg every 12 hours , oxygen at 3 lt / min and streptokinase 1'500,000 units administered over 30 minutes . No changes secondary to reperfusion were demonstrated . The patient was transferred to the intensive care unit , where episodes of complete A - V block with spontaneous resolution were documented during the first hours of evolution . \n",
" The EKG taken at 24 hours of evolution revealed QS in the inferior face and a late progression of the R wave in the anterior face . Enzyme monitoring showed increased CK and MB fraction at 6 hours ( 4476 and 165 ) and 12 hours ( 3839 and 136 ) . \n",
" The next day coronary angiography showed diffuse disease of the anterior descending artery with 50 % lesion in the distal third and 40 % lesion in the proximal third of the first diagonal branch . The circumflex artery had a 50 % lesion in the middle third and diffuse disease of its obtuse marginal branches . The right coronary artery had an irregular lesion suggestive of a partially resolved thrombus producing a maximum stenosis of 50 % ; distally the posterior descending artery had two 40 % lesions . \n",
"\n",
" The patient evolved satisfactorily without further complications and pain - free . He was discharged for outpatient follow - up . \n",
"\n",
"\n",
" \n",
"O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B I O O O O O O O O O O B O O O O O O O O O O O O O O O O O O O O O B I I I I I O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B I I I I O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B I I I I I I I I I I I I I I I I I I O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O \n"
]
}
],
"source": [
"words = raw_datasets[\"train\"][0][\"tokens\"]\n",
"labels = [int(n) for n in raw_datasets[\"train\"][0][\"ner_tags\"]]\n",
"#labels = raw_datasets[\"train\"][0][\"pos_tags\"]\n",
"#labels = raw_datasets[\"train\"][0][\"chunk_tags\"]\n",
"line1 = \"\"\n",
"line2 = \"\"\n",
"for word, label in zip(words, labels):\n",
" full_label = label_names[label]\n",
" max_length = max(len(word), len(full_label))\n",
" line1 += word + \" \" * (max_length - len(word) + 1)\n",
" line2 += full_label + \" \" * (max_length - len(full_label) + 1)\n",
"\n",
"print(line1)\n",
"print(line2)"
]
},
{
"cell_type": "markdown",
"source": [
"## Helper Functions"
],
"metadata": {
"id": "0ABWC5YnTZzp"
}
},
{
"cell_type": "code",
"source": [
"def align_labels_with_tokens(labels, word_ids):\n",
" new_labels = []\n",
" current_word = None\n",
" for word_id in word_ids:\n",
" if word_id != current_word:\n",
" # Start of a new word!\n",
" current_word = word_id\n",
" label = -100 if word_id is None else labels[word_id]\n",
" new_labels.append(label)\n",
" elif word_id is None:\n",
" # Special token\n",
" new_labels.append(-100)\n",
" else:\n",
" # Same word as previous token\n",
" label = labels[word_id]\n",
" # If the label is B-XXX we change it to I-XXX\n",
" if label % 2 == 1:\n",
" label += 1\n",
" new_labels.append(label)\n",
"\n",
" return new_labels"
],
"metadata": {
"id": "LQUKQDvxTNHn"
},
"execution_count": 37,
"outputs": []
},
{
"cell_type": "code",
"source": [
"def tokenize_and_align_labels(examples):\n",
" tokenized_inputs = tokenizer(\n",
" examples[\"tokens\"], truncation=True, is_split_into_words=True\n",
" )\n",
" all_labels = examples[\"ner_tags\"]\n",
" new_labels = []\n",
" for i, labels in enumerate(all_labels):\n",
" word_ids = tokenized_inputs.word_ids(i)\n",
" new_labels.append(align_labels_with_tokens(labels, word_ids))\n",
"\n",
" tokenized_inputs[\"labels\"] = new_labels\n",
" return tokenized_inputs"
],
"metadata": {
"id": "qT-6oXUoTYah"
},
"execution_count": 38,
"outputs": []
},
{
"cell_type": "code",
"source": [
"metric = load_metric(\"seqeval\")\n",
"def compute_metrics(eval_preds):\n",
" logits, labels = eval_preds\n",
" predictions = np.argmax(logits, axis=-1)\n",
"\n",
" # Remove ignored index (special tokens) and convert to labels\n",
" true_labels = [[label_names[l] for l in label if l != -100] for label in labels]\n",
" try:\n",
" true_predictions = [\n",
" [label_names[p] for (p, l) in zip(prediction, label) if l != -100]\n",
" for prediction, label in zip(predictions, labels)\n",
" ]\n",
" except:\n",
" true_predictions = []\n",
" for prediction, label in zip(predictions, labels):\n",
" label_list = []\n",
" for (p, l) in zip(prediction, label):\n",
" if l != -100:\n",
" if p not in range(len(label_names)):\n",
" p = 0\n",
" \n",
" label_list.append(label_names[p])\n",
" true_predictions.append(label_list)\n",
"\n",
" all_metrics = metric.compute(predictions=true_predictions, references=true_labels)\n",
" return {\n",
" \"precision\": all_metrics[\"overall_precision\"],\n",
" \"recall\": all_metrics[\"overall_recall\"],\n",
" \"f1\": all_metrics[\"overall_f1\"],\n",
" \"accuracy\": all_metrics[\"overall_accuracy\"],\n",
" }"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 104,
"referenced_widgets": [
"b3fe12ca95e84b198d16bdb4d20f9ad9",
"1b7f8f1786394c01bad4a8589ad16513",
"70e437b3ba294189b4799c6607532ebd",
"0fb47d91dbf9497cac1ffc1c5dfd4519",
"9cfec0f21c0a459f9f5888c389a6a479",
"ef66098fb5f748eabe11abc3fe4ad54d",
"563d8b35192240be960bc08909984119",
"b94385d1423e47f5a9e2351bf873c3e0",
"e1b6e7774bc94a87ad23fb53d6c9b985",
"a0b523772cf04a85b0ac000cc9a83c67",
"71a3f1b2112344ea81721e59cce14cec"
]
},
"id": "EZMRI9ATTzc9",
"outputId": "14e38b3a-86a7-49b1-817f-92a7304747a2"
},
"execution_count": 39,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:1: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading builder script: 0%| | 0.00/2.47k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "b3fe12ca95e84b198d16bdb4d20f9ad9"
}
},
"metadata": {}
}
]
},
{
"cell_type": "markdown",
"source": [
"# Loading BERT as a pre-trained model"
],
"metadata": {
"id": "6D0P7PztVnCP"
}
},
{
"cell_type": "markdown",
"source": [
"## Model 1 - d4data/biomedical-ner-all\n",
"\n",
"Complete document wise tokenization"
],
"metadata": {
"id": "mmPReh2mSux9"
}
},
{
"cell_type": "code",
"source": [
"model_checkpoint = \"d4data/biomedical-ner-all\"\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_prefix_space=True)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 145,
"referenced_widgets": [
"a275c54cbefb4438a3015080e8b57999",
"a4696c167a3247bd8fd0727e0556463a",
"afa79c37c031491da9e229c637d80cc4",
"5ddf799b2fd94edc9949d36450a2d5e9",
"592af11564074af19e40bce6680ed7f1",
"125b61b8e80d4192a6f19d43ba4797dc",
"d9ff16ada2d94eb7a1adc70e5265ff2d",
"f9bd10de9e2845f08100a29293b92d1c",
"c28349dbeccc4124b583e0eeec004e6b",
"ff2cf349b07442bd9812dd8c7e82e59b",
"414d466fed0b42378d8b38f10c720eba",
"e432f1e3e5c54358a321a21e9c7aad1f",
"cf931d70dc1a4d2ba5f10dba7bf90ece",
"58f1edc459ef4f5bab25544474897db3",
"8894005504364c36964d283cf58bb223",
"78db41a453ce4ff4884960c615147331",
"fe8d877f0fc1417baad9838094045475",
"74bfdb85ed55436f8c12bf9b25375533",
"e813e2a1cb7248b7a8c404d55e4fb248",
"cb3c438fb3a6412d80b5ba673a6455cb",
"bb189f5bc189462cad4824a1c30335c0",
"e98b7218049f4310951a1608c52c14e0",
"3dd1f27ff0d24a1294534ff7e69a7abb",
"4128d82e19f14e9d9be5416ebc974d0d",
"6676a80dc293456ea7aed4ce3e281d83",
"55a7a4c336884f26a53292d559a06ff8",
"d27469698b1e4ad1ae74ced6f7c3942d",
"d201490a05c049d38b087008aac0a400",
"703d715a4ef64c4e93cc6496f5340451",
"41d861058e3e458e949f1f3d92623217",
"f4d9343bd31d47b1b3dcf0494825be2d",
"b75e2a47db2b47dd8740f77b337c308f",
"d8849516ccb44011a7f9e7e745b30c60",
"ae5928c8da4243fba06ae9bf5086ba31",
"e613455bcbb24e36a31666acd83d7b24",
"bac45a33f9b444a1985ef56a9be85c52",
"91ac0673e600400f904b1b10deb86cee",
"289f23dd30814993afde0f5e987fdd9e",
"8e72912c0e434060ac30517a98d07a9e",
"ede252ab2cee4ffbbc2f5519373d1e97",
"f2da21cc1007475ca0233a9e5d146d65",
"92fd7c43f87142d1bbd05f89ba3bfe39",
"b58c9bfa30b3421496adb52e082cb50a",
"6cbfa925d26e47139365d10b9b28d96a"
]
},
"id": "RfwVBXFAS1Dc",
"outputId": "3a7c9806-3828-4ab7-c73d-d0e8039e1417"
},
"execution_count": 40,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading: 0%| | 0.00/373 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "a275c54cbefb4438a3015080e8b57999"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading: 0%| | 0.00/232k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "e432f1e3e5c54358a321a21e9c7aad1f"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading: 0%| | 0.00/711k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "3dd1f27ff0d24a1294534ff7e69a7abb"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading: 0%| | 0.00/125 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "ae5928c8da4243fba06ae9bf5086ba31"
}
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"tokenizer.is_fast"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "a9DegC0yS_Gp",
"outputId": "083e5505-007e-43c8-fe31-3b202c84cc76"
},
"execution_count": 41,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 41
}
]
},
{
"cell_type": "code",
"source": [
"inputs = tokenizer(raw_datasets[\"train\"][0][\"tokens\"], is_split_into_words=True)\n",
"inputs.tokens()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "v-7VQWw5TB-c",
"outputId": "0922ac3e-61af-43ae-f3a8-29f291e7c19b"
},
"execution_count": 42,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"Token indices sequence length is longer than the specified maximum sequence length for this model (567 > 512). Running this sequence through the model will result in indexing errors\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['[CLS]',\n",
" 'this',\n",
" 'is',\n",
" 'a',\n",
" '70',\n",
" '-',\n",
" 'year',\n",
" '-',\n",
" 'old',\n",
" 'male',\n",
" 'patient',\n",
" ',',\n",
" 'who',\n",
" 'was',\n",
" 'admitted',\n",
" 'to',\n",
" 'the',\n",
" 'emergency',\n",
" 'department',\n",
" 'of',\n",
" 'the',\n",
" 'hospital',\n",
" 'pablo',\n",
" 'to',\n",
" '##bon',\n",
" 'ur',\n",
" '##ibe',\n",
" ',',\n",
" 'with',\n",
" 'symptoms',\n",
" 'of',\n",
" 'approximately',\n",
" 'one',\n",
" 'hour',\n",
" 'of',\n",
" 'evolution',\n",
" 'consisting',\n",
" 'of',\n",
" 'chest',\n",
" 'tight',\n",
" '##ness',\n",
" ',',\n",
" 'general',\n",
" 'mala',\n",
" '##ise',\n",
" ',',\n",
" 'as',\n",
" '##the',\n",
" '##nia',\n",
" 'and',\n",
" 'dia',\n",
" '##ph',\n",
" '##ores',\n",
" '##is',\n",
" ';',\n",
" 'which',\n",
" 'began',\n",
" 'after',\n",
" 'having',\n",
" 'ing',\n",
" '##ested',\n",
" '100',\n",
" 'mg',\n",
" 'of',\n",
" 'si',\n",
" '##lden',\n",
" '##af',\n",
" '##il',\n",
" ',',\n",
" 'denies',\n",
" 'ing',\n",
" '##est',\n",
" '##ion',\n",
" 'of',\n",
" 'another',\n",
" 'sexual',\n",
" 'st',\n",
" '##im',\n",
" '##ula',\n",
" '##nt',\n",
" 'or',\n",
" 'cocaine',\n",
" 'and',\n",
" 'without',\n",
" 'sexual',\n",
" 'intercourse',\n",
" 'after',\n",
" 'its',\n",
" 'consumption',\n",
" '.',\n",
" 'the',\n",
" 'patient',\n",
" \"'\",\n",
" 's',\n",
" 'only',\n",
" 'clinical',\n",
" 'history',\n",
" 'was',\n",
" 'arterial',\n",
" 'hyper',\n",
" '##tension',\n",
" ',',\n",
" 'ph',\n",
" '##arm',\n",
" '##aco',\n",
" '##logical',\n",
" '##ly',\n",
" 'controlled',\n",
" ',',\n",
" 'and',\n",
" 'he',\n",
" 'denies',\n",
" 'previous',\n",
" 'episodes',\n",
" 'of',\n",
" 'ang',\n",
" '##ina',\n",
" 'or',\n",
" 'nitrate',\n",
" 'consumption',\n",
" '.',\n",
" 'the',\n",
" 'clinical',\n",
" 'examination',\n",
" 'and',\n",
" 'vital',\n",
" 'signs',\n",
" 'were',\n",
" 'normal',\n",
" ';',\n",
" 'however',\n",
" ',',\n",
" 'after',\n",
" 'the',\n",
" 'initial',\n",
" 'assessment',\n",
" 'he',\n",
" 'presented',\n",
" 'card',\n",
" '##ior',\n",
" '##es',\n",
" '##pi',\n",
" '##rator',\n",
" '##y',\n",
" 'arrest',\n",
" 'secondary',\n",
" 'to',\n",
" 'vent',\n",
" '##ric',\n",
" '##ular',\n",
" 'fi',\n",
" '##bri',\n",
" '##llation',\n",
" 'with',\n",
" 'response',\n",
" 'to',\n",
" 'a',\n",
" 'single',\n",
" 'def',\n",
" '##ib',\n",
" '##rill',\n",
" '##ation',\n",
" 'of',\n",
" '200',\n",
" 'jo',\n",
" '##ules',\n",
" '.',\n",
" 'the',\n",
" 'initial',\n",
" 'electro',\n",
" '##card',\n",
" '##io',\n",
" '##gram',\n",
" 'showed',\n",
" 'st',\n",
" '-',\n",
" 'segment',\n",
" 'elevation',\n",
" 'in',\n",
" 'the',\n",
" 'inferior',\n",
" '(',\n",
" 'ii',\n",
" ',',\n",
" 'iii',\n",
" 'and',\n",
" 'av',\n",
" '##f',\n",
" ')',\n",
" 'and',\n",
" 'anterior',\n",
" '(',\n",
" 'v',\n",
" '##2',\n",
" '-',\n",
" 'v',\n",
" '##4',\n",
" ')',\n",
" 'leads',\n",
" 'with',\n",
" 'reciprocal',\n",
" 'changes',\n",
" 'in',\n",
" 'av',\n",
" '##l',\n",
" ',',\n",
" 'with',\n",
" 'no',\n",
" 'electro',\n",
" '##card',\n",
" '##io',\n",
" '##graphic',\n",
" 'extension',\n",
" 'to',\n",
" 'the',\n",
" 'right',\n",
" 'vent',\n",
" '##ric',\n",
" '##le',\n",
" '.',\n",
" 'cardiac',\n",
" 'enzymes',\n",
" 'on',\n",
" 'admission',\n",
" 'revealed',\n",
" 'a',\n",
" 'cr',\n",
" '##ea',\n",
" '##tine',\n",
" 'kinase',\n",
" '(',\n",
" 'ck',\n",
" ')',\n",
" 'of',\n",
" '170',\n",
" 'and',\n",
" 'a',\n",
" 'cr',\n",
" '##ea',\n",
" '##tine',\n",
" 'ph',\n",
" '##os',\n",
" '##ph',\n",
" '##oki',\n",
" '##nas',\n",
" '##e',\n",
" '-',\n",
" 'mb',\n",
" 'fraction',\n",
" '(',\n",
" 'ck',\n",
" '-',\n",
" 'mb',\n",
" ')',\n",
" 'of',\n",
" '6',\n",
" '.',\n",
" 'electro',\n",
" '##ly',\n",
" '##tes',\n",
" ',',\n",
" 'coa',\n",
" '##gul',\n",
" '##ation',\n",
" 'tests',\n",
" 'and',\n",
" 'blood',\n",
" 'cell',\n",
" 'counts',\n",
" 'were',\n",
" 'normal',\n",
" '.',\n",
" 'initial',\n",
" 'management',\n",
" 'was',\n",
" 'with',\n",
" 'as',\n",
" '##pi',\n",
" '##rin',\n",
" '100',\n",
" 'mg',\n",
" ',',\n",
" 'lo',\n",
" '##vas',\n",
" '##tat',\n",
" '##in',\n",
" '40',\n",
" 'mg',\n",
" 'daily',\n",
" ',',\n",
" 'met',\n",
" '##op',\n",
" '##rol',\n",
" '##ol',\n",
" '25',\n",
" 'mg',\n",
" 'every',\n",
" '12',\n",
" 'hours',\n",
" ',',\n",
" 'en',\n",
" '##ox',\n",
" '##apa',\n",
" '##rin',\n",
" '60',\n",
" 'mg',\n",
" 'every',\n",
" '12',\n",
" 'hours',\n",
" ',',\n",
" 'oxygen',\n",
" 'at',\n",
" '3',\n",
" 'lt',\n",
" '/',\n",
" 'min',\n",
" 'and',\n",
" 'st',\n",
" '##re',\n",
" '##pt',\n",
" '##oki',\n",
" '##nas',\n",
" '##e',\n",
" '1',\n",
" \"'\",\n",
" '500',\n",
" ',',\n",
" '000',\n",
" 'units',\n",
" 'administered',\n",
" 'over',\n",
" '30',\n",
" 'minutes',\n",
" '.',\n",
" 'no',\n",
" 'changes',\n",
" 'secondary',\n",
" 'to',\n",
" 'rep',\n",
" '##er',\n",
" '##fusion',\n",
" 'were',\n",
" 'demonstrated',\n",
" '.',\n",
" 'the',\n",
" 'patient',\n",
" 'was',\n",
" 'transferred',\n",
" 'to',\n",
" 'the',\n",
" 'intensive',\n",
" 'care',\n",
" 'unit',\n",
" ',',\n",
" 'where',\n",
" 'episodes',\n",
" 'of',\n",
" 'complete',\n",
" 'a',\n",
" '-',\n",
" 'v',\n",
" 'block',\n",
" 'with',\n",
" 'spontaneous',\n",
" 'resolution',\n",
" 'were',\n",
" 'documented',\n",
" 'during',\n",
" 'the',\n",
" 'first',\n",
" 'hours',\n",
" 'of',\n",
" 'evolution',\n",
" '.',\n",
" 'the',\n",
" 'ek',\n",
" '##g',\n",
" 'taken',\n",
" 'at',\n",
" '24',\n",
" 'hours',\n",
" 'of',\n",
" 'evolution',\n",
" 'revealed',\n",
" 'q',\n",
" '##s',\n",
" 'in',\n",
" 'the',\n",
" 'inferior',\n",
" 'face',\n",
" 'and',\n",
" 'a',\n",
" 'late',\n",
" 'progression',\n",
" 'of',\n",
" 'the',\n",
" 'r',\n",
" 'wave',\n",
" 'in',\n",
" 'the',\n",
" 'anterior',\n",
" 'face',\n",
" '.',\n",
" 'enzyme',\n",
" 'monitoring',\n",
" 'showed',\n",
" 'increased',\n",
" 'ck',\n",
" 'and',\n",
" 'mb',\n",
" 'fraction',\n",
" 'at',\n",
" '6',\n",
" 'hours',\n",
" '(',\n",
" '44',\n",
" '##7',\n",
" '##6',\n",
" 'and',\n",
" '165',\n",
" ')',\n",
" 'and',\n",
" '12',\n",
" 'hours',\n",
" '(',\n",
" '38',\n",
" '##39',\n",
" 'and',\n",
" '136',\n",
" ')',\n",
" '.',\n",
" 'the',\n",
" 'next',\n",
" 'day',\n",
" 'corona',\n",
" '##ry',\n",
" 'ang',\n",
" '##iography',\n",
" 'showed',\n",
" 'diffuse',\n",
" 'disease',\n",
" 'of',\n",
" 'the',\n",
" 'anterior',\n",
" 'descending',\n",
" 'artery',\n",
" 'with',\n",
" '50',\n",
" '%',\n",
" 'les',\n",
" '##ion',\n",
" 'in',\n",
" 'the',\n",
" 'distal',\n",
" 'third',\n",
" 'and',\n",
" '40',\n",
" '%',\n",
" 'les',\n",
" '##ion',\n",
" 'in',\n",
" 'the',\n",
" 'pro',\n",
" '##xi',\n",
" '##mal',\n",
" 'third',\n",
" 'of',\n",
" 'the',\n",
" 'first',\n",
" 'diagonal',\n",
" 'branch',\n",
" '.',\n",
" 'the',\n",
" 'ci',\n",
" '##rc',\n",
" '##um',\n",
" '##fle',\n",
" '##x',\n",
" 'artery',\n",
" 'had',\n",
" 'a',\n",
" '50',\n",
" '%',\n",
" 'les',\n",
" '##ion',\n",
" 'in',\n",
" 'the',\n",
" 'middle',\n",
" 'third',\n",
" 'and',\n",
" 'diffuse',\n",
" 'disease',\n",
" 'of',\n",
" 'its',\n",
" 'ob',\n",
" '##tus',\n",
" '##e',\n",
" 'marginal',\n",
" 'branches',\n",
" '.',\n",
" 'the',\n",
" 'right',\n",
" 'corona',\n",
" '##ry',\n",
" 'artery',\n",
" 'had',\n",
" 'an',\n",
" 'irregular',\n",
" 'les',\n",
" '##ion',\n",
" 'suggest',\n",
" '##ive',\n",
" 'of',\n",
" 'a',\n",
" 'partially',\n",
" 'resolved',\n",
" 'th',\n",
" '##rom',\n",
" '##bus',\n",
" 'producing',\n",
" 'a',\n",
" 'maximum',\n",
" 'ste',\n",
" '##nosis',\n",
" 'of',\n",
" '50',\n",
" '%',\n",
" ';',\n",
" 'distal',\n",
" '##ly',\n",
" 'the',\n",
" 'posterior',\n",
" 'descending',\n",
" 'artery',\n",
" 'had',\n",
" 'two',\n",
" '40',\n",
" '%',\n",
" 'lesions',\n",
" '.',\n",
" 'the',\n",
" 'patient',\n",
" 'evolved',\n",
" 'sat',\n",
" '##is',\n",
" '##fa',\n",
" '##ctor',\n",
" '##ily',\n",
" 'without',\n",
" 'further',\n",
" 'complications',\n",
" 'and',\n",
" 'pain',\n",
" '-',\n",
" 'free',\n",
" '.',\n",
" 'he',\n",
" 'was',\n",
" 'discharged',\n",
" 'for',\n",
" 'out',\n",
" '##patient',\n",
" 'follow',\n",
" '-',\n",
" 'up',\n",
" '.',\n",
" '[SEP]']"
]
},
"metadata": {},
"execution_count": 42
}
]
},
{
"cell_type": "code",
"source": [
"labels = raw_datasets[\"train\"][0][\"ner_tags\"]\n",
"word_ids = inputs.word_ids()\n",
"print(labels)\n",
"print(align_labels_with_tokens(labels, word_ids))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "w77KW9-3TD4u",
"outputId": "eae68b43-ff6a-48cb-a380-930f156c24c6"
},
"execution_count": 43,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -100]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"tokenized_datasets = raw_datasets.map(\n",
" tokenize_and_align_labels,\n",
" batched=True,\n",
" remove_columns=raw_datasets[\"train\"].column_names,\n",
")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 81,
"referenced_widgets": [
"d161016f9fea41e6b27eb537c12d0703",
"f315bcfdc76848cb8e851a2698e0248b",
"ef152955607540f2a7d38bf9e2207eec",
"456c36425ac94dc294f8402c07668a51",
"4d305d32efdf4b639e65e816a7132597",
"5c3a10b039c344509be9867ca40a8472",
"6663eac35b7a4043b97edb90a555e3d9",
"30bce58edba74043abc1a2625c492d4a",
"d4b6dbbad9c946ed99b6c6e587bfb6da",
"8976a59e4ea049088f92a37f7547e16e",
"6a689955d9b3463abaaaa03b62d3cf69",
"9d65a59161cd401aad05f4a52d51c724",
"6f244b91a3884eb5b0fbd577ed5d1710",
"2b6ad660dd1f4c78855433118b9fb61e",
"101fa9a9581a46d8b1e0951f03796740",
"7ffe4378bc7b410780780dd51d0705ea",
"77c422e831944566a6529da37645ef6d",
"9a88121d0138438980f1c7e4341f480a",
"52600cdbf4804b148e02724ae4902de5",
"11f16a0c34e64d6494ac1d2550d18f8f",
"645616ac236e479c8303a56100d26d51",
"859e35e323f0407fbdea9eb7ae953742"
]
},
"id": "5BPyKS51TWGK",
"outputId": "f7ada6c5-8860-40d3-f32b-4b79abe14ae8"
},
"execution_count": 44,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
" 0%| | 0/1 [00:00<?, ?ba/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "d161016f9fea41e6b27eb537c12d0703"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
" 0%| | 0/1 [00:00<?, ?ba/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "9d65a59161cd401aad05f4a52d51c724"
}
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)"
],
"metadata": {
"id": "_EZvP3kyTfms"
},
"execution_count": 45,
"outputs": []
},
{
"cell_type": "code",
"source": [
"batch = data_collator([tokenized_datasets[\"train\"][i] for i in range(2)])\n",
"batch[\"labels\"]"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "IkQdm1ODTiWJ",
"outputId": "5d18c54e-c5aa-4d89-ed8a-e1b7664350f8"
},
"execution_count": 46,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"tensor([[-100, 0, 0, ..., 0, 0, -100],\n",
" [-100, 0, 0, ..., 0, 0, -100]])"
]
},
"metadata": {},
"execution_count": 46
}
]
},
{
"cell_type": "code",
"source": [
"id2label = {str(i): label for i, label in enumerate(label_names)}\n",
"label2id = {v: k for k, v in id2label.items()}"
],
"metadata": {
"id": "QPom2dyaTkdf"
},
"execution_count": 47,
"outputs": []
},
{
"cell_type": "code",
"source": [
"model = AutoModelForTokenClassification.from_pretrained( \n",
" model_checkpoint\n",
")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "LSRqpzXDTnpt",
"outputId": "f87c31de-e40d-49f8-81a1-53a2a9905083"
},
"execution_count": 55,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--d4data--biomedical-ner-all/snapshots/e87917020da1384aed6e93b1b46d68771f65ddab/config.json\n",
"Model config DistilBertConfig {\n",
" \"_name_or_path\": \"d4data/biomedical-ner-all\",\n",
" \"activation\": \"gelu\",\n",
" \"architectures\": [\n",
" \"DistilBertForTokenClassification\"\n",
" ],\n",
" \"attention_dropout\": 0.1,\n",
" \"dim\": 768,\n",
" \"dropout\": 0.1,\n",
" \"hidden_dim\": 3072,\n",
" \"id2label\": {\n",
" \"0\": \"O\",\n",
" \"1\": \"B-Activity\",\n",
" \"2\": \"B-Administration\",\n",
" \"3\": \"B-Age\",\n",
" \"4\": \"B-Area\",\n",
" \"5\": \"B-Biological_attribute\",\n",
" \"6\": \"B-Biological_structure\",\n",
" \"7\": \"B-Clinical_event\",\n",
" \"8\": \"B-Color\",\n",
" \"9\": \"B-Coreference\",\n",
" \"10\": \"B-Date\",\n",
" \"11\": \"B-Detailed_description\",\n",
" \"12\": \"B-Diagnostic_procedure\",\n",
" \"13\": \"B-Disease_disorder\",\n",
" \"14\": \"B-Distance\",\n",
" \"15\": \"B-Dosage\",\n",
" \"16\": \"B-Duration\",\n",
" \"17\": \"B-Family_history\",\n",
" \"18\": \"B-Frequency\",\n",
" \"19\": \"B-Height\",\n",
" \"20\": \"B-History\",\n",
" \"21\": \"B-Lab_value\",\n",
" \"22\": \"B-Mass\",\n",
" \"23\": \"B-Medication\",\n",
" \"24\": \"B-Non[biological](Detailed_description\",\n",
" \"25\": \"B-Nonbiological_location\",\n",
" \"26\": \"B-Occupation\",\n",
" \"27\": \"B-Other_entity\",\n",
" \"28\": \"B-Other_event\",\n",
" \"29\": \"B-Outcome\",\n",
" \"30\": \"B-Personal_[back](Biological_structure\",\n",
" \"31\": \"B-Personal_background\",\n",
" \"32\": \"B-Qualitative_concept\",\n",
" \"33\": \"B-Quantitative_concept\",\n",
" \"34\": \"B-Severity\",\n",
" \"35\": \"B-Sex\",\n",
" \"36\": \"B-Shape\",\n",
" \"37\": \"B-Sign_symptom\",\n",
" \"38\": \"B-Subject\",\n",
" \"39\": \"B-Texture\",\n",
" \"40\": \"B-Therapeutic_procedure\",\n",
" \"41\": \"B-Time\",\n",
" \"42\": \"B-Volume\",\n",
" \"43\": \"B-Weight\",\n",
" \"44\": \"I-Activity\",\n",
" \"45\": \"I-Administration\",\n",
" \"46\": \"I-Age\",\n",
" \"47\": \"I-Area\",\n",
" \"48\": \"I-Biological_attribute\",\n",
" \"49\": \"I-Biological_structure\",\n",
" \"50\": \"I-Clinical_event\",\n",
" \"51\": \"I-Color\",\n",
" \"52\": \"I-Coreference\",\n",
" \"53\": \"I-Date\",\n",
" \"54\": \"I-Detailed_description\",\n",
" \"55\": \"I-Diagnostic_procedure\",\n",
" \"56\": \"I-Disease_disorder\",\n",
" \"57\": \"I-Distance\",\n",
" \"58\": \"I-Dosage\",\n",
" \"59\": \"I-Duration\",\n",
" \"60\": \"I-Family_history\",\n",
" \"61\": \"I-Frequency\",\n",
" \"62\": \"I-Height\",\n",
" \"63\": \"I-History\",\n",
" \"64\": \"I-Lab_value\",\n",
" \"65\": \"I-Mass\",\n",
" \"66\": \"I-Medication\",\n",
" \"67\": \"I-Nonbiological_location\",\n",
" \"68\": \"I-Occupation\",\n",
" \"69\": \"I-Other_entity\",\n",
" \"70\": \"I-Other_event\",\n",
" \"71\": \"I-Outcome\",\n",
" \"72\": \"I-Personal_background\",\n",
" \"73\": \"I-Qualitative_concept\",\n",
" \"74\": \"I-Quantitative_concept\",\n",
" \"75\": \"I-Severity\",\n",
" \"76\": \"I-Shape\",\n",
" \"77\": \"I-Sign_symptom\",\n",
" \"78\": \"I-Subject\",\n",
" \"79\": \"I-Texture\",\n",
" \"80\": \"I-Therapeutic_procedure\",\n",
" \"81\": \"I-Time\",\n",
" \"82\": \"I-Volume\",\n",
" \"83\": \"I-Weight\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"label2id\": {\n",
" \"B-Activity\": 1,\n",
" \"B-Administration\": 2,\n",
" \"B-Age\": 3,\n",
" \"B-Area\": 4,\n",
" \"B-Biological_attribute\": 5,\n",
" \"B-Biological_structure\": 6,\n",
" \"B-Clinical_event\": 7,\n",
" \"B-Color\": 8,\n",
" \"B-Coreference\": 9,\n",
" \"B-Date\": 10,\n",
" \"B-Detailed_description\": 11,\n",
" \"B-Diagnostic_procedure\": 12,\n",
" \"B-Disease_disorder\": 13,\n",
" \"B-Distance\": 14,\n",
" \"B-Dosage\": 15,\n",
" \"B-Duration\": 16,\n",
" \"B-Family_history\": 17,\n",
" \"B-Frequency\": 18,\n",
" \"B-Height\": 19,\n",
" \"B-History\": 20,\n",
" \"B-Lab_value\": 21,\n",
" \"B-Mass\": 22,\n",
" \"B-Medication\": 23,\n",
" \"B-Non[biological](Detailed_description\": 24,\n",
" \"B-Nonbiological_location\": 25,\n",
" \"B-Occupation\": 26,\n",
" \"B-Other_entity\": 27,\n",
" \"B-Other_event\": 28,\n",
" \"B-Outcome\": 29,\n",
" \"B-Personal_[back](Biological_structure\": 30,\n",
" \"B-Personal_background\": 31,\n",
" \"B-Qualitative_concept\": 32,\n",
" \"B-Quantitative_concept\": 33,\n",
" \"B-Severity\": 34,\n",
" \"B-Sex\": 35,\n",
" \"B-Shape\": 36,\n",
" \"B-Sign_symptom\": 37,\n",
" \"B-Subject\": 38,\n",
" \"B-Texture\": 39,\n",
" \"B-Therapeutic_procedure\": 40,\n",
" \"B-Time\": 41,\n",
" \"B-Volume\": 42,\n",
" \"B-Weight\": 43,\n",
" \"I-Activity\": 44,\n",
" \"I-Administration\": 45,\n",
" \"I-Age\": 46,\n",
" \"I-Area\": 47,\n",
" \"I-Biological_attribute\": 48,\n",
" \"I-Biological_structure\": 49,\n",
" \"I-Clinical_event\": 50,\n",
" \"I-Color\": 51,\n",
" \"I-Coreference\": 52,\n",
" \"I-Date\": 53,\n",
" \"I-Detailed_description\": 54,\n",
" \"I-Diagnostic_procedure\": 55,\n",
" \"I-Disease_disorder\": 56,\n",
" \"I-Distance\": 57,\n",
" \"I-Dosage\": 58,\n",
" \"I-Duration\": 59,\n",
" \"I-Family_history\": 60,\n",
" \"I-Frequency\": 61,\n",
" \"I-Height\": 62,\n",
" \"I-History\": 63,\n",
" \"I-Lab_value\": 64,\n",
" \"I-Mass\": 65,\n",
" \"I-Medication\": 66,\n",
" \"I-Nonbiological_location\": 67,\n",
" \"I-Occupation\": 68,\n",
" \"I-Other_entity\": 69,\n",
" \"I-Other_event\": 70,\n",
" \"I-Outcome\": 71,\n",
" \"I-Personal_background\": 72,\n",
" \"I-Qualitative_concept\": 73,\n",
" \"I-Quantitative_concept\": 74,\n",
" \"I-Severity\": 75,\n",
" \"I-Shape\": 76,\n",
" \"I-Sign_symptom\": 77,\n",
" \"I-Subject\": 78,\n",
" \"I-Texture\": 79,\n",
" \"I-Therapeutic_procedure\": 80,\n",
" \"I-Time\": 81,\n",
" \"I-Volume\": 82,\n",
" \"I-Weight\": 83,\n",
" \"O\": 0\n",
" },\n",
" \"max_position_embeddings\": 512,\n",
" \"model_type\": \"distilbert\",\n",
" \"n_heads\": 12,\n",
" \"n_layers\": 6,\n",
" \"pad_token_id\": 0,\n",
" \"qa_dropout\": 0.1,\n",
" \"seq_classif_dropout\": 0.2,\n",
" \"sinusoidal_pos_embds\": false,\n",
" \"tie_weights_\": true,\n",
" \"torch_dtype\": \"float32\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"vocab_size\": 30522\n",
"}\n",
"\n",
"loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--d4data--biomedical-ner-all/snapshots/e87917020da1384aed6e93b1b46d68771f65ddab/pytorch_model.bin\n",
"All model checkpoint weights were used when initializing DistilBertForTokenClassification.\n",
"\n",
"All the weights of DistilBertForTokenClassification were initialized from the model checkpoint at d4data/biomedical-ner-all.\n",
"If your task is similar to the task the model of the checkpoint was trained on, you can already use DistilBertForTokenClassification for predictions without further training.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"args = TrainingArguments(\n",
" \"NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased\",\n",
" evaluation_strategy = IntervalStrategy.STEPS,\n",
" eval_steps = 50,\n",
" learning_rate=5e-5,\n",
" num_train_epochs=50,\n",
" weight_decay=0.01,\n",
" metric_for_best_model = 'f1',\n",
" load_best_model_at_end=True\n",
")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Z4DTxHPoTsGs",
"outputId": "9b8c9d65-4d5a-4fb6-b91e-d41efb1ac68c"
},
"execution_count": 56,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"PyTorch: setting up devices\n",
"The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"trainer = Trainer(\n",
" model=model,\n",
" args=args,\n",
" train_dataset=tokenized_datasets[\"train\"],\n",
" eval_dataset=tokenized_datasets[\"validation\"],\n",
" data_collator=data_collator,\n",
" compute_metrics=compute_metrics,\n",
" tokenizer=tokenizer,\n",
" callbacks = [EarlyStoppingCallback(early_stopping_patience=3)]\n",
")\n",
"trainer.train()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"id": "rVfzrBZMTv6x",
"outputId": "3cdd94b4-ab62-40dc-e180-1f7a22a4ef1b"
},
"execution_count": 57,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"***** Running training *****\n",
" Num examples = 555\n",
" Num Epochs = 50\n",
" Instantaneous batch size per device = 8\n",
" Total train batch size (w. parallel, distributed & accumulation) = 8\n",
" Gradient Accumulation steps = 1\n",
" Total optimization steps = 3500\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='900' max='3500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [ 900/3500 02:39 < 07:41, 5.63 it/s, Epoch 12/50]\n",
" </div>\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>Step</th>\n",
" <th>Training Loss</th>\n",
" <th>Validation Loss</th>\n",
" <th>Precision</th>\n",
" <th>Recall</th>\n",
" <th>F1</th>\n",
" <th>Accuracy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>50</td>\n",
" <td>No log</td>\n",
" <td>0.162818</td>\n",
" <td>0.270997</td>\n",
" <td>0.309661</td>\n",
" <td>0.289042</td>\n",
" <td>0.937595</td>\n",
" </tr>\n",
" <tr>\n",
" <td>100</td>\n",
" <td>No log</td>\n",
" <td>0.166674</td>\n",
" <td>0.348107</td>\n",
" <td>0.517594</td>\n",
" <td>0.416259</td>\n",
" <td>0.938953</td>\n",
" </tr>\n",
" <tr>\n",
" <td>150</td>\n",
" <td>No log</td>\n",
" <td>0.165462</td>\n",
" <td>0.376270</td>\n",
" <td>0.521433</td>\n",
" <td>0.437115</td>\n",
" <td>0.938915</td>\n",
" </tr>\n",
" <tr>\n",
" <td>200</td>\n",
" <td>No log</td>\n",
" <td>0.146949</td>\n",
" <td>0.404802</td>\n",
" <td>0.463852</td>\n",
" <td>0.432320</td>\n",
" <td>0.945101</td>\n",
" </tr>\n",
" <tr>\n",
" <td>250</td>\n",
" <td>No log</td>\n",
" <td>0.165257</td>\n",
" <td>0.440503</td>\n",
" <td>0.492642</td>\n",
" <td>0.465116</td>\n",
" <td>0.944807</td>\n",
" </tr>\n",
" <tr>\n",
" <td>300</td>\n",
" <td>No log</td>\n",
" <td>0.187354</td>\n",
" <td>0.421611</td>\n",
" <td>0.431862</td>\n",
" <td>0.426675</td>\n",
" <td>0.945729</td>\n",
" </tr>\n",
" <tr>\n",
" <td>350</td>\n",
" <td>No log</td>\n",
" <td>0.203230</td>\n",
" <td>0.445104</td>\n",
" <td>0.479846</td>\n",
" <td>0.461823</td>\n",
" <td>0.945396</td>\n",
" </tr>\n",
" <tr>\n",
" <td>400</td>\n",
" <td>No log</td>\n",
" <td>0.215578</td>\n",
" <td>0.419140</td>\n",
" <td>0.523992</td>\n",
" <td>0.465738</td>\n",
" <td>0.944269</td>\n",
" </tr>\n",
" <tr>\n",
" <td>450</td>\n",
" <td>No log</td>\n",
" <td>0.228786</td>\n",
" <td>0.456535</td>\n",
" <td>0.467051</td>\n",
" <td>0.461733</td>\n",
" <td>0.945178</td>\n",
" </tr>\n",
" <tr>\n",
" <td>500</td>\n",
" <td>0.103800</td>\n",
" <td>0.261185</td>\n",
" <td>0.427002</td>\n",
" <td>0.522073</td>\n",
" <td>0.469775</td>\n",
" <td>0.943641</td>\n",
" </tr>\n",
" <tr>\n",
" <td>550</td>\n",
" <td>0.103800</td>\n",
" <td>0.265154</td>\n",
" <td>0.424972</td>\n",
" <td>0.485605</td>\n",
" <td>0.453270</td>\n",
" <td>0.945165</td>\n",
" </tr>\n",
" <tr>\n",
" <td>600</td>\n",
" <td>0.103800</td>\n",
" <td>0.269958</td>\n",
" <td>0.383793</td>\n",
" <td>0.551504</td>\n",
" <td>0.452612</td>\n",
" <td>0.941015</td>\n",
" </tr>\n",
" <tr>\n",
" <td>650</td>\n",
" <td>0.103800</td>\n",
" <td>0.283041</td>\n",
" <td>0.449687</td>\n",
" <td>0.506078</td>\n",
" <td>0.476219</td>\n",
" <td>0.945652</td>\n",
" </tr>\n",
" <tr>\n",
" <td>700</td>\n",
" <td>0.103800</td>\n",
" <td>0.296066</td>\n",
" <td>0.423529</td>\n",
" <td>0.552783</td>\n",
" <td>0.479600</td>\n",
" <td>0.941335</td>\n",
" </tr>\n",
" <tr>\n",
" <td>750</td>\n",
" <td>0.103800</td>\n",
" <td>0.282410</td>\n",
" <td>0.444382</td>\n",
" <td>0.503519</td>\n",
" <td>0.472106</td>\n",
" <td>0.944884</td>\n",
" </tr>\n",
" <tr>\n",
" <td>800</td>\n",
" <td>0.103800</td>\n",
" <td>0.295058</td>\n",
" <td>0.400769</td>\n",
" <td>0.533589</td>\n",
" <td>0.457739</td>\n",
" <td>0.943615</td>\n",
" </tr>\n",
" <tr>\n",
" <td>850</td>\n",
" <td>0.103800</td>\n",
" <td>0.294637</td>\n",
" <td>0.432018</td>\n",
" <td>0.504159</td>\n",
" <td>0.465309</td>\n",
" <td>0.944320</td>\n",
" </tr>\n",
" <tr>\n",
" <td>900</td>\n",
" <td>0.103800</td>\n",
" <td>0.296676</td>\n",
" <td>0.411402</td>\n",
" <td>0.512476</td>\n",
" <td>0.456410</td>\n",
" <td>0.942796</td>\n",
" </tr>\n",
" </tbody>\n",
"</table><p>"
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500\n",
"Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/config.json\n",
"Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/pytorch_model.bin\n",
"tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/tokenizer_config.json\n",
"Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/special_tokens_map.json\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"\n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"Loading best model from NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500 (score: 0.4697754749568221).\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"TrainOutput(global_step=900, training_loss=0.06402364306979709, metrics={'train_runtime': 159.6722, 'train_samples_per_second': 173.794, 'train_steps_per_second': 21.92, 'total_flos': 932785215873192.0, 'train_loss': 0.06402364306979709, 'epoch': 12.86})"
]
},
"metadata": {},
"execution_count": 57
}
]
},
{
"cell_type": "code",
"source": [
"trainer.save_model('model/distilbert-base-uncased-all-tokens')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "g_r4zDTuT6n0",
"outputId": "d9cf6e0f-4d72-4dc3-bdaf-fe162acfde8f"
},
"execution_count": 58,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"Saving model checkpoint to model/distilbert-base-uncased-all-tokens\n",
"Configuration saved in model/distilbert-base-uncased-all-tokens/config.json\n",
"Model weights saved in model/distilbert-base-uncased-all-tokens/pytorch_model.bin\n",
"tokenizer config file saved in model/distilbert-base-uncased-all-tokens/tokenizer_config.json\n",
"Special tokens file saved in model/distilbert-base-uncased-all-tokens/special_tokens_map.json\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Model 1 - d4data/biomedical-ner-all\n",
"\n",
"### Sentence Based Modelling"
],
"metadata": {
"id": "U9LTC6H7Ut3-"
}
},
{
"cell_type": "code",
"source": [
"dic = {\"tokens\": sent_tokenized, \"ner_tags\": label_sent_tokenized} #Use this option if you want to check the model performance with sentences tokenized by \". \" b"
],
"metadata": {
"id": "YntZAcxIUpmM"
},
"execution_count": 59,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"id": "c0p6sqVqVDhK"
},
"outputs": [],
"source": [
"dataset = Dataset.from_dict(dic)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "54fb3085-2261-496a-f6d0-60f93f654540",
"id": "dKokCRtaVDhK"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Dataset({\n",
" features: ['tokens', 'ner_tags'],\n",
" num_rows: 11668\n",
"})"
]
},
"metadata": {},
"execution_count": 61
}
],
"source": [
"dataset"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {
"id": "yaiKWzNRVDhK"
},
"outputs": [],
"source": [
"#For training, validation, and test partitions\n",
"\"\"\"\n",
"#Train, val, test partitions\n",
"train_test = dataset.train_test_split()\n",
"test_val = train_test['test'].train_test_split()\n",
"raw_datasets = DatasetDict({\n",
" 'train': train_test['train'],\n",
" 'validation': test_val['train'],\n",
" 'test': test_val['test']\n",
" })\n",
"\"\"\"\n",
"\n",
"#Just for training and validation partitions\n",
"train_test = dataset.train_test_split()\n",
"raw_datasets = DatasetDict({\n",
" 'train': train_test['train'],\n",
" 'validation': train_test['test']\n",
" })"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "3f04c836-fa6c-4f6b-9be8-1a3b77910f74",
"id": "bJryyZX2VDhL"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"DatasetDict({\n",
" train: Dataset({\n",
" features: ['tokens', 'ner_tags'],\n",
" num_rows: 8751\n",
" })\n",
" validation: Dataset({\n",
" features: ['tokens', 'ner_tags'],\n",
" num_rows: 2917\n",
" })\n",
"})"
]
},
"metadata": {},
"execution_count": 63
}
],
"source": [
"raw_datasets"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "7cb91446-41f5-4257-9d36-aac32e1a2d8b",
"id": "p9q9WmGpVDhL"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
]
},
"metadata": {},
"execution_count": 64
}
],
"source": [
"raw_datasets[\"train\"][0][\"ner_tags\"]\n",
"#raw_datasets[\"train\"][0][\"pos_tags\"]\n",
"#raw_datasets[\"train\"][0][\"chunk_tags\"]"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "0dd389dd-cb44-4235-c327-f58e5d8f2f1a",
"id": "K7Sip5njVDhL"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Dataset({\n",
" features: ['tokens', 'ner_tags'],\n",
" num_rows: 8751\n",
"})"
]
},
"metadata": {},
"execution_count": 65
}
],
"source": [
"raw_datasets['train']"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "fc667def-0a5f-481d-85c3-8ff5f80a5eb5",
"id": "BI420tEFVDhL"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['O', 'B', 'I']"
]
},
"metadata": {},
"execution_count": 66
}
],
"source": [
"label_names = ['O','B','I']\n",
"label_names"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "d11245fb-f724-440f-954e-064b90d32579",
"id": "fvbDPubIVDhL"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Since 2006 she had tried several times to lose weight , without success . \n",
"O O O O O O O O O O O O O O \n"
]
}
],
"source": [
"words = raw_datasets[\"train\"][0][\"tokens\"]\n",
"labels = [int(n) for n in raw_datasets[\"train\"][0][\"ner_tags\"]]\n",
"#labels = raw_datasets[\"train\"][0][\"pos_tags\"]\n",
"#labels = raw_datasets[\"train\"][0][\"chunk_tags\"]\n",
"line1 = \"\"\n",
"line2 = \"\"\n",
"for word, label in zip(words, labels):\n",
" full_label = label_names[label]\n",
" max_length = max(len(word), len(full_label))\n",
" line1 += word + \" \" * (max_length - len(word) + 1)\n",
" line2 += full_label + \" \" * (max_length - len(full_label) + 1)\n",
"\n",
"print(line1)\n",
"print(line2)"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "9d061e6c-4cd9-472d-b873-814d709efb63",
"id": "LXmlD43QVDhL"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--d4data--biomedical-ner-all/snapshots/e87917020da1384aed6e93b1b46d68771f65ddab/vocab.txt\n",
"loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--d4data--biomedical-ner-all/snapshots/e87917020da1384aed6e93b1b46d68771f65ddab/tokenizer.json\n",
"loading file added_tokens.json from cache at None\n",
"loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--d4data--biomedical-ner-all/snapshots/e87917020da1384aed6e93b1b46d68771f65ddab/special_tokens_map.json\n",
"loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--d4data--biomedical-ner-all/snapshots/e87917020da1384aed6e93b1b46d68771f65ddab/tokenizer_config.json\n"
]
}
],
"source": [
"model_checkpoint = \"d4data/biomedical-ner-all\"\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_prefix_space=True)"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e11cebbd-2fd8-4db3-f014-d52f80ae104c",
"id": "rUn3zUd9VDhM"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 69
}
],
"source": [
"tokenizer.is_fast"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "0196fde7-250b-4486-f63f-2dcea8848e5d",
"id": "LXxYSaDLVDhM"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['[CLS]',\n",
" 'since',\n",
" '2006',\n",
" 'she',\n",
" 'had',\n",
" 'tried',\n",
" 'several',\n",
" 'times',\n",
" 'to',\n",
" 'lose',\n",
" 'weight',\n",
" ',',\n",
" 'without',\n",
" 'success',\n",
" '.',\n",
" '[SEP]']"
]
},
"metadata": {},
"execution_count": 70
}
],
"source": [
"inputs = tokenizer(raw_datasets[\"train\"][0][\"tokens\"], is_split_into_words=True)\n",
"inputs.tokens()"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "48b11a85-7ca4-4589-dcba-21833e5b6c55",
"id": "-bG1VI2NVDhM"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -100]\n"
]
}
],
"source": [
"labels = raw_datasets[\"train\"][0][\"ner_tags\"]\n",
"word_ids = inputs.word_ids()\n",
"print(labels)\n",
"print(align_labels_with_tokens(labels, word_ids))"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 81,
"referenced_widgets": [
"40c2b37fa07f44648cecc9b7e406e7e2",
"69ed5fe8ed6046acb4202689c065f858",
"bbd9cf7a77aa48fda3a648583ed02b08",
"ebe8b2b35e884fd28bb42eacf01ff07c",
"ddb14bc1d5d4437a9ee4a895846e7d29",
"669c24c6309f46cbbdcd0c764143e74f",
"d1e4665beafa4bbeb25d0e9e8447a5a9",
"09217bdc1e2145eb84cc97207595e6f0",
"b294af01ac5f483dacbe2e1c40fdf223",
"c1e27e9184204d618ce59b97f7302335",
"2e63f2af443d448aaaddf81127def048",
"9239cc2fd1d94d86986b7f395de70fca",
"f1e8d31b67db4089ab1b036bda341617",
"33be40ebcab54ff68855f1145cf5e1d6",
"d96c111f09d74a0c9816328f88d9e45b",
"3907dc2aaa484877aee9beab8a6888d4",
"aaacfb0f3bd1427ea44ec84c28a2aaf7",
"cb92e843491142e8a2a4008223a90d02",
"52169f264141463e94a7761a4ffb3f7a",
"e509b790873740b59aa2f52875ca2038",
"a395318bce7348d78ca83a308552f042",
"a2bb171f700743559e1d2c472c8289ef"
]
},
"outputId": "aad10f99-e5fa-423c-f259-a12310b18d1d",
"id": "0OVZq1BtVDhM"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
" 0%| | 0/9 [00:00<?, ?ba/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "40c2b37fa07f44648cecc9b7e406e7e2"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
" 0%| | 0/3 [00:00<?, ?ba/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "9239cc2fd1d94d86986b7f395de70fca"
}
},
"metadata": {}
}
],
"source": [
"tokenized_datasets = raw_datasets.map(\n",
" tokenize_and_align_labels,\n",
" batched=True,\n",
" remove_columns=raw_datasets[\"train\"].column_names,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {
"id": "rdl_EpWuVDhN"
},
"outputs": [],
"source": [
"data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "d2768d3a-b399-4236-a9c4-cfff18f31d1c",
"id": "gY7K46A2VDhN"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"tensor([[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n",
" -100, -100, -100, -100, -100, -100, -100, -100, -100, -100],\n",
" [-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 1, 2, 2, 2, 2, 2, 2, 2, 0, -100]])"
]
},
"metadata": {},
"execution_count": 74
}
],
"source": [
"batch = data_collator([tokenized_datasets[\"train\"][i] for i in range(2)])\n",
"batch[\"labels\"]"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {
"id": "PNC5_yuKVDhN"
},
"outputs": [],
"source": [
"id2label = {str(i): label for i, label in enumerate(label_names)}\n",
"label2id = {v: k for k, v in id2label.items()}"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "ba1aa394-ff7a-471e-a8dc-d35755f501ed",
"id": "AbRkJHvbVDhN"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--d4data--biomedical-ner-all/snapshots/e87917020da1384aed6e93b1b46d68771f65ddab/config.json\n",
"Model config DistilBertConfig {\n",
" \"_name_or_path\": \"d4data/biomedical-ner-all\",\n",
" \"activation\": \"gelu\",\n",
" \"architectures\": [\n",
" \"DistilBertForTokenClassification\"\n",
" ],\n",
" \"attention_dropout\": 0.1,\n",
" \"dim\": 768,\n",
" \"dropout\": 0.1,\n",
" \"hidden_dim\": 3072,\n",
" \"id2label\": {\n",
" \"0\": \"O\",\n",
" \"1\": \"B-Activity\",\n",
" \"2\": \"B-Administration\",\n",
" \"3\": \"B-Age\",\n",
" \"4\": \"B-Area\",\n",
" \"5\": \"B-Biological_attribute\",\n",
" \"6\": \"B-Biological_structure\",\n",
" \"7\": \"B-Clinical_event\",\n",
" \"8\": \"B-Color\",\n",
" \"9\": \"B-Coreference\",\n",
" \"10\": \"B-Date\",\n",
" \"11\": \"B-Detailed_description\",\n",
" \"12\": \"B-Diagnostic_procedure\",\n",
" \"13\": \"B-Disease_disorder\",\n",
" \"14\": \"B-Distance\",\n",
" \"15\": \"B-Dosage\",\n",
" \"16\": \"B-Duration\",\n",
" \"17\": \"B-Family_history\",\n",
" \"18\": \"B-Frequency\",\n",
" \"19\": \"B-Height\",\n",
" \"20\": \"B-History\",\n",
" \"21\": \"B-Lab_value\",\n",
" \"22\": \"B-Mass\",\n",
" \"23\": \"B-Medication\",\n",
" \"24\": \"B-Non[biological](Detailed_description\",\n",
" \"25\": \"B-Nonbiological_location\",\n",
" \"26\": \"B-Occupation\",\n",
" \"27\": \"B-Other_entity\",\n",
" \"28\": \"B-Other_event\",\n",
" \"29\": \"B-Outcome\",\n",
" \"30\": \"B-Personal_[back](Biological_structure\",\n",
" \"31\": \"B-Personal_background\",\n",
" \"32\": \"B-Qualitative_concept\",\n",
" \"33\": \"B-Quantitative_concept\",\n",
" \"34\": \"B-Severity\",\n",
" \"35\": \"B-Sex\",\n",
" \"36\": \"B-Shape\",\n",
" \"37\": \"B-Sign_symptom\",\n",
" \"38\": \"B-Subject\",\n",
" \"39\": \"B-Texture\",\n",
" \"40\": \"B-Therapeutic_procedure\",\n",
" \"41\": \"B-Time\",\n",
" \"42\": \"B-Volume\",\n",
" \"43\": \"B-Weight\",\n",
" \"44\": \"I-Activity\",\n",
" \"45\": \"I-Administration\",\n",
" \"46\": \"I-Age\",\n",
" \"47\": \"I-Area\",\n",
" \"48\": \"I-Biological_attribute\",\n",
" \"49\": \"I-Biological_structure\",\n",
" \"50\": \"I-Clinical_event\",\n",
" \"51\": \"I-Color\",\n",
" \"52\": \"I-Coreference\",\n",
" \"53\": \"I-Date\",\n",
" \"54\": \"I-Detailed_description\",\n",
" \"55\": \"I-Diagnostic_procedure\",\n",
" \"56\": \"I-Disease_disorder\",\n",
" \"57\": \"I-Distance\",\n",
" \"58\": \"I-Dosage\",\n",
" \"59\": \"I-Duration\",\n",
" \"60\": \"I-Family_history\",\n",
" \"61\": \"I-Frequency\",\n",
" \"62\": \"I-Height\",\n",
" \"63\": \"I-History\",\n",
" \"64\": \"I-Lab_value\",\n",
" \"65\": \"I-Mass\",\n",
" \"66\": \"I-Medication\",\n",
" \"67\": \"I-Nonbiological_location\",\n",
" \"68\": \"I-Occupation\",\n",
" \"69\": \"I-Other_entity\",\n",
" \"70\": \"I-Other_event\",\n",
" \"71\": \"I-Outcome\",\n",
" \"72\": \"I-Personal_background\",\n",
" \"73\": \"I-Qualitative_concept\",\n",
" \"74\": \"I-Quantitative_concept\",\n",
" \"75\": \"I-Severity\",\n",
" \"76\": \"I-Shape\",\n",
" \"77\": \"I-Sign_symptom\",\n",
" \"78\": \"I-Subject\",\n",
" \"79\": \"I-Texture\",\n",
" \"80\": \"I-Therapeutic_procedure\",\n",
" \"81\": \"I-Time\",\n",
" \"82\": \"I-Volume\",\n",
" \"83\": \"I-Weight\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"label2id\": {\n",
" \"B-Activity\": 1,\n",
" \"B-Administration\": 2,\n",
" \"B-Age\": 3,\n",
" \"B-Area\": 4,\n",
" \"B-Biological_attribute\": 5,\n",
" \"B-Biological_structure\": 6,\n",
" \"B-Clinical_event\": 7,\n",
" \"B-Color\": 8,\n",
" \"B-Coreference\": 9,\n",
" \"B-Date\": 10,\n",
" \"B-Detailed_description\": 11,\n",
" \"B-Diagnostic_procedure\": 12,\n",
" \"B-Disease_disorder\": 13,\n",
" \"B-Distance\": 14,\n",
" \"B-Dosage\": 15,\n",
" \"B-Duration\": 16,\n",
" \"B-Family_history\": 17,\n",
" \"B-Frequency\": 18,\n",
" \"B-Height\": 19,\n",
" \"B-History\": 20,\n",
" \"B-Lab_value\": 21,\n",
" \"B-Mass\": 22,\n",
" \"B-Medication\": 23,\n",
" \"B-Non[biological](Detailed_description\": 24,\n",
" \"B-Nonbiological_location\": 25,\n",
" \"B-Occupation\": 26,\n",
" \"B-Other_entity\": 27,\n",
" \"B-Other_event\": 28,\n",
" \"B-Outcome\": 29,\n",
" \"B-Personal_[back](Biological_structure\": 30,\n",
" \"B-Personal_background\": 31,\n",
" \"B-Qualitative_concept\": 32,\n",
" \"B-Quantitative_concept\": 33,\n",
" \"B-Severity\": 34,\n",
" \"B-Sex\": 35,\n",
" \"B-Shape\": 36,\n",
" \"B-Sign_symptom\": 37,\n",
" \"B-Subject\": 38,\n",
" \"B-Texture\": 39,\n",
" \"B-Therapeutic_procedure\": 40,\n",
" \"B-Time\": 41,\n",
" \"B-Volume\": 42,\n",
" \"B-Weight\": 43,\n",
" \"I-Activity\": 44,\n",
" \"I-Administration\": 45,\n",
" \"I-Age\": 46,\n",
" \"I-Area\": 47,\n",
" \"I-Biological_attribute\": 48,\n",
" \"I-Biological_structure\": 49,\n",
" \"I-Clinical_event\": 50,\n",
" \"I-Color\": 51,\n",
" \"I-Coreference\": 52,\n",
" \"I-Date\": 53,\n",
" \"I-Detailed_description\": 54,\n",
" \"I-Diagnostic_procedure\": 55,\n",
" \"I-Disease_disorder\": 56,\n",
" \"I-Distance\": 57,\n",
" \"I-Dosage\": 58,\n",
" \"I-Duration\": 59,\n",
" \"I-Family_history\": 60,\n",
" \"I-Frequency\": 61,\n",
" \"I-Height\": 62,\n",
" \"I-History\": 63,\n",
" \"I-Lab_value\": 64,\n",
" \"I-Mass\": 65,\n",
" \"I-Medication\": 66,\n",
" \"I-Nonbiological_location\": 67,\n",
" \"I-Occupation\": 68,\n",
" \"I-Other_entity\": 69,\n",
" \"I-Other_event\": 70,\n",
" \"I-Outcome\": 71,\n",
" \"I-Personal_background\": 72,\n",
" \"I-Qualitative_concept\": 73,\n",
" \"I-Quantitative_concept\": 74,\n",
" \"I-Severity\": 75,\n",
" \"I-Shape\": 76,\n",
" \"I-Sign_symptom\": 77,\n",
" \"I-Subject\": 78,\n",
" \"I-Texture\": 79,\n",
" \"I-Therapeutic_procedure\": 80,\n",
" \"I-Time\": 81,\n",
" \"I-Volume\": 82,\n",
" \"I-Weight\": 83,\n",
" \"O\": 0\n",
" },\n",
" \"max_position_embeddings\": 512,\n",
" \"model_type\": \"distilbert\",\n",
" \"n_heads\": 12,\n",
" \"n_layers\": 6,\n",
" \"pad_token_id\": 0,\n",
" \"qa_dropout\": 0.1,\n",
" \"seq_classif_dropout\": 0.2,\n",
" \"sinusoidal_pos_embds\": false,\n",
" \"tie_weights_\": true,\n",
" \"torch_dtype\": \"float32\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"vocab_size\": 30522\n",
"}\n",
"\n",
"loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--d4data--biomedical-ner-all/snapshots/e87917020da1384aed6e93b1b46d68771f65ddab/pytorch_model.bin\n",
"All model checkpoint weights were used when initializing DistilBertForTokenClassification.\n",
"\n",
"All the weights of DistilBertForTokenClassification were initialized from the model checkpoint at d4data/biomedical-ner-all.\n",
"If your task is similar to the task the model of the checkpoint was trained on, you can already use DistilBertForTokenClassification for predictions without further training.\n"
]
}
],
"source": [
"model = AutoModelForTokenClassification.from_pretrained( \n",
" model_checkpoint\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "MQILaROUVDhN",
"outputId": "a548d6da-abe4-417a-99eb-f65d2c5b3273"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"PyTorch: setting up devices\n",
"The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
]
}
],
"source": [
"args = TrainingArguments(\n",
" \"NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased\",\n",
" evaluation_strategy = IntervalStrategy.STEPS,\n",
" eval_steps = 50,\n",
" learning_rate=5e-5,\n",
" num_train_epochs=50,\n",
" weight_decay=0.01,\n",
" metric_for_best_model = 'f1',\n",
" load_best_model_at_end=True\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "cb0a12c3-0ce4-4d59-d773-f2f31ff05b68",
"id": "MRJatT86VDhN"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:310: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" FutureWarning,\n",
"***** Running training *****\n",
" Num examples = 8751\n",
" Num Epochs = 50\n",
" Instantaneous batch size per device = 8\n",
" Total train batch size (w. parallel, distributed & accumulation) = 8\n",
" Gradient Accumulation steps = 1\n",
" Total optimization steps = 54700\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='1650' max='54700' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [ 1650/54700 04:10 < 2:14:23, 6.58 it/s, Epoch 1/50]\n",
" </div>\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>Step</th>\n",
" <th>Training Loss</th>\n",
" <th>Validation Loss</th>\n",
" <th>Precision</th>\n",
" <th>Recall</th>\n",
" <th>F1</th>\n",
" <th>Accuracy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>50</td>\n",
" <td>No log</td>\n",
" <td>0.220473</td>\n",
" <td>0.223507</td>\n",
" <td>0.240041</td>\n",
" <td>0.231479</td>\n",
" <td>0.923262</td>\n",
" </tr>\n",
" <tr>\n",
" <td>100</td>\n",
" <td>No log</td>\n",
" <td>0.178907</td>\n",
" <td>0.233188</td>\n",
" <td>0.249353</td>\n",
" <td>0.241000</td>\n",
" <td>0.932774</td>\n",
" </tr>\n",
" <tr>\n",
" <td>150</td>\n",
" <td>No log</td>\n",
" <td>0.170712</td>\n",
" <td>0.209794</td>\n",
" <td>0.210554</td>\n",
" <td>0.210173</td>\n",
" <td>0.934965</td>\n",
" </tr>\n",
" <tr>\n",
" <td>200</td>\n",
" <td>No log</td>\n",
" <td>0.161026</td>\n",
" <td>0.308015</td>\n",
" <td>0.341956</td>\n",
" <td>0.324099</td>\n",
" <td>0.937756</td>\n",
" </tr>\n",
" <tr>\n",
" <td>250</td>\n",
" <td>No log</td>\n",
" <td>0.167800</td>\n",
" <td>0.290117</td>\n",
" <td>0.282462</td>\n",
" <td>0.286239</td>\n",
" <td>0.940074</td>\n",
" </tr>\n",
" <tr>\n",
" <td>300</td>\n",
" <td>No log</td>\n",
" <td>0.179418</td>\n",
" <td>0.388704</td>\n",
" <td>0.302638</td>\n",
" <td>0.340314</td>\n",
" <td>0.939863</td>\n",
" </tr>\n",
" <tr>\n",
" <td>350</td>\n",
" <td>No log</td>\n",
" <td>0.160154</td>\n",
" <td>0.333333</td>\n",
" <td>0.443870</td>\n",
" <td>0.380741</td>\n",
" <td>0.938609</td>\n",
" </tr>\n",
" <tr>\n",
" <td>400</td>\n",
" <td>No log</td>\n",
" <td>0.199905</td>\n",
" <td>0.207132</td>\n",
" <td>0.204346</td>\n",
" <td>0.205729</td>\n",
" <td>0.933585</td>\n",
" </tr>\n",
" <tr>\n",
" <td>450</td>\n",
" <td>No log</td>\n",
" <td>0.154181</td>\n",
" <td>0.349614</td>\n",
" <td>0.422142</td>\n",
" <td>0.382470</td>\n",
" <td>0.940611</td>\n",
" </tr>\n",
" <tr>\n",
" <td>500</td>\n",
" <td>0.213900</td>\n",
" <td>0.154374</td>\n",
" <td>0.376731</td>\n",
" <td>0.422142</td>\n",
" <td>0.398146</td>\n",
" <td>0.941137</td>\n",
" </tr>\n",
" <tr>\n",
" <td>550</td>\n",
" <td>0.213900</td>\n",
" <td>0.154931</td>\n",
" <td>0.426748</td>\n",
" <td>0.432488</td>\n",
" <td>0.429599</td>\n",
" <td>0.943276</td>\n",
" </tr>\n",
" <tr>\n",
" <td>600</td>\n",
" <td>0.213900</td>\n",
" <td>0.147228</td>\n",
" <td>0.402466</td>\n",
" <td>0.472840</td>\n",
" <td>0.434824</td>\n",
" <td>0.942075</td>\n",
" </tr>\n",
" <tr>\n",
" <td>650</td>\n",
" <td>0.213900</td>\n",
" <td>0.154059</td>\n",
" <td>0.274133</td>\n",
" <td>0.265908</td>\n",
" <td>0.269958</td>\n",
" <td>0.939326</td>\n",
" </tr>\n",
" <tr>\n",
" <td>700</td>\n",
" <td>0.213900</td>\n",
" <td>0.158030</td>\n",
" <td>0.389341</td>\n",
" <td>0.544232</td>\n",
" <td>0.453937</td>\n",
" <td>0.940095</td>\n",
" </tr>\n",
" <tr>\n",
" <td>750</td>\n",
" <td>0.213900</td>\n",
" <td>0.137712</td>\n",
" <td>0.373972</td>\n",
" <td>0.399897</td>\n",
" <td>0.386500</td>\n",
" <td>0.946952</td>\n",
" </tr>\n",
" <tr>\n",
" <td>800</td>\n",
" <td>0.213900</td>\n",
" <td>0.145364</td>\n",
" <td>0.385280</td>\n",
" <td>0.530781</td>\n",
" <td>0.446475</td>\n",
" <td>0.941485</td>\n",
" </tr>\n",
" <tr>\n",
" <td>850</td>\n",
" <td>0.213900</td>\n",
" <td>0.148491</td>\n",
" <td>0.401239</td>\n",
" <td>0.535954</td>\n",
" <td>0.458915</td>\n",
" <td>0.940895</td>\n",
" </tr>\n",
" <tr>\n",
" <td>900</td>\n",
" <td>0.213900</td>\n",
" <td>0.149123</td>\n",
" <td>0.374374</td>\n",
" <td>0.464046</td>\n",
" <td>0.414414</td>\n",
" <td>0.946784</td>\n",
" </tr>\n",
" <tr>\n",
" <td>950</td>\n",
" <td>0.213900</td>\n",
" <td>0.142598</td>\n",
" <td>0.405747</td>\n",
" <td>0.365235</td>\n",
" <td>0.384427</td>\n",
" <td>0.946963</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1000</td>\n",
" <td>0.153200</td>\n",
" <td>0.143212</td>\n",
" <td>0.449354</td>\n",
" <td>0.341956</td>\n",
" <td>0.388367</td>\n",
" <td>0.945857</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1050</td>\n",
" <td>0.153200</td>\n",
" <td>0.146789</td>\n",
" <td>0.435653</td>\n",
" <td>0.495603</td>\n",
" <td>0.463698</td>\n",
" <td>0.947300</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1100</td>\n",
" <td>0.153200</td>\n",
" <td>0.137099</td>\n",
" <td>0.421190</td>\n",
" <td>0.501811</td>\n",
" <td>0.457979</td>\n",
" <td>0.948090</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1150</td>\n",
" <td>0.153200</td>\n",
" <td>0.144017</td>\n",
" <td>0.437966</td>\n",
" <td>0.485773</td>\n",
" <td>0.460633</td>\n",
" <td>0.948532</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1200</td>\n",
" <td>0.153200</td>\n",
" <td>0.142823</td>\n",
" <td>0.459889</td>\n",
" <td>0.344025</td>\n",
" <td>0.393608</td>\n",
" <td>0.947416</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1250</td>\n",
" <td>0.153200</td>\n",
" <td>0.163925</td>\n",
" <td>0.380834</td>\n",
" <td>0.476979</td>\n",
" <td>0.423519</td>\n",
" <td>0.947732</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1300</td>\n",
" <td>0.153200</td>\n",
" <td>0.153968</td>\n",
" <td>0.461087</td>\n",
" <td>0.447491</td>\n",
" <td>0.454187</td>\n",
" <td>0.948711</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1350</td>\n",
" <td>0.153200</td>\n",
" <td>0.157536</td>\n",
" <td>0.465220</td>\n",
" <td>0.432488</td>\n",
" <td>0.448257</td>\n",
" <td>0.949680</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1400</td>\n",
" <td>0.153200</td>\n",
" <td>0.149706</td>\n",
" <td>0.421693</td>\n",
" <td>0.394206</td>\n",
" <td>0.407487</td>\n",
" <td>0.947236</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1450</td>\n",
" <td>0.153200</td>\n",
" <td>0.178025</td>\n",
" <td>0.472966</td>\n",
" <td>0.448008</td>\n",
" <td>0.460149</td>\n",
" <td>0.947447</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1500</td>\n",
" <td>0.113500</td>\n",
" <td>0.146669</td>\n",
" <td>0.440809</td>\n",
" <td>0.529747</td>\n",
" <td>0.481203</td>\n",
" <td>0.947689</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1550</td>\n",
" <td>0.113500</td>\n",
" <td>0.144341</td>\n",
" <td>0.480186</td>\n",
" <td>0.426280</td>\n",
" <td>0.451631</td>\n",
" <td>0.950344</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1600</td>\n",
" <td>0.113500</td>\n",
" <td>0.140071</td>\n",
" <td>0.446570</td>\n",
" <td>0.495085</td>\n",
" <td>0.469578</td>\n",
" <td>0.948690</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1650</td>\n",
" <td>0.113500</td>\n",
" <td>0.175828</td>\n",
" <td>0.432393</td>\n",
" <td>0.309364</td>\n",
" <td>0.360676</td>\n",
" <td>0.945825</td>\n",
" </tr>\n",
" </tbody>\n",
"</table><p>"
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500\n",
"Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/config.json\n",
"Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/pytorch_model.bin\n",
"tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/tokenizer_config.json\n",
"Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/special_tokens_map.json\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000\n",
"Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/config.json\n",
"Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/pytorch_model.bin\n",
"tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/tokenizer_config.json\n",
"Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/special_tokens_map.json\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500\n",
"Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500/config.json\n",
"Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500/pytorch_model.bin\n",
"tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500/tokenizer_config.json\n",
"Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500/special_tokens_map.json\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"\n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"Loading best model from NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500 (score: 0.48120300751879697).\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"TrainOutput(global_step=1650, training_loss=0.15572210947672527, metrics={'train_runtime': 250.5398, 'train_samples_per_second': 1746.429, 'train_steps_per_second': 218.329, 'total_flos': 244690187133744.0, 'train_loss': 0.15572210947672527, 'epoch': 1.51})"
]
},
"metadata": {},
"execution_count": 78
}
],
"source": [
"trainer = Trainer(\n",
" model=model,\n",
" args=args,\n",
" train_dataset=tokenized_datasets[\"train\"],\n",
" eval_dataset=tokenized_datasets[\"validation\"],\n",
" data_collator=data_collator,\n",
" compute_metrics=compute_metrics,\n",
" tokenizer=tokenizer,\n",
" callbacks = [EarlyStoppingCallback(early_stopping_patience=3)]\n",
")\n",
"trainer.train()"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "cb8d8bbc-fe8e-4926-d201-8a64dc0edcdb",
"id": "P2GfgJz0VDhO"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"Saving model checkpoint to model/distilbert-base-uncased-sentence\n",
"Configuration saved in model/distilbert-base-uncased-sentence/config.json\n",
"Model weights saved in model/distilbert-base-uncased-sentence/pytorch_model.bin\n",
"tokenizer config file saved in model/distilbert-base-uncased-sentence/tokenizer_config.json\n",
"Special tokens file saved in model/distilbert-base-uncased-sentence/special_tokens_map.json\n"
]
}
],
"source": [
"trainer.save_model('model/distilbert-base-uncased-sentence')"
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "lzL_QhxW7Dha"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Model 2 - pucpr/clinicalnerpt-medical\n",
"\n",
"Whole document based tokenization"
],
"metadata": {
"id": "GgGucecD600w"
}
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000,
"referenced_widgets": [
"f0fc94c6df4c432f9e1edcfceaf44edd",
"03f4216f904d4bf6a8e461c50f40378f",
"76fecb01ad6441a08ad0ddb989a8ab80",
"783edab3150d40a3bf99ed910cdbaf88",
"e33897e8fa4841308c55b19352876ab1",
"909d7d349769402a8c20b02b35eafbd9",
"6139e3d551044671a79e15e8adf351ab",
"1c19ec799ccb4e788f34ed8ac37d495f",
"7a49ac5f18f64b41825378184f8c32ec",
"76ad2415389f41deb4f2810bca56b753",
"8c4bb02e55fa48429c8f86dac5cb369e",
"980374f604ec4970b0afa70d108c864b",
"baa9e8a9169a45efb8c117fdf4ea45b7",
"aa248b7d4eae4e5d965a7d04144adacc",
"311d8199627e4c4f83d02c167b5755f3",
"a299d926edbb4c51906b1be8f694d074",
"c76ebf5d6c6c46cd9d14591a47725ae1",
"178555439d854d30a01cac053adf9079",
"00c8d81110fe4f4bbcf77be4d20581c3",
"43c18ef010014cb395f045dd26497fba",
"3b83d1f429d34e8e8de6ddff555df02d",
"91a8348ac2194686a9ef075f7d49687d",
"42dcc74bff5440608a3e9f2fa580cd3c",
"616c54d0cd534047b93b215e7baf2ba3",
"846ce38d6fb84279a7419091f2d269b0",
"1e15050772b54e34b014a98b9710c783",
"951f476862ea49619100e202a6e742f6",
"f6e25547ba664cd59128536944a926fd",
"53185a2afedc41e0a680d5007656b90b",
"860f8204efae42d6ad2fa4eb9e661810",
"e61ec283a9c04cc696d17bbe24ccf460",
"15abc268777a4e3cb5c3c7f430745c1d",
"e6c1266e8b074bdfafa0db6208743a07",
"01a12f499b7942cc90f2032a8f3284e9",
"804bf9b8a2154399a05dd0860f4dfd89",
"66fca4d7946240c3b08ba51fac82f2ae",
"6cefb3be5be9488ca033ed9908c6a8f5",
"2d0f2804db004da0914e4733ce96b749",
"3d6b93cde5254ce99f19802b7c1146f4",
"838416bfbee0400299abff324c4825bc",
"f7584b77d97f4d48aa5b50bae2df49f8",
"0a5cd97f8a914ff89fd27aed7b38164d",
"ad4c308ebd574e909d31b161580b9064",
"74030c98fcb942ed9d9ffc43799113f0"
]
},
"id": "axyP0XOOKpLg",
"outputId": "da8d34ae-66c7-459a-99be-56d7a434ec80"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading: 0%| | 0.00/151 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "f0fc94c6df4c432f9e1edcfceaf44edd"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading: 0%| | 0.00/1.05k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "980374f604ec4970b0afa70d108c864b"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/config.json\n",
"Model config BertConfig {\n",
" \"_name_or_path\": \"pucpr/clinicalnerpt-medical\",\n",
" \"_num_labels\": 3,\n",
" \"architectures\": [\n",
" \"BertForTokenClassification\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"classifier_dropout\": null,\n",
" \"directionality\": \"bidi\",\n",
" \"eos_token_ids\": null,\n",
" \"gradient_checkpointing\": false,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"id2label\": {\n",
" \"0\": \"O\",\n",
" \"1\": \"B-MedicalDevice\",\n",
" \"2\": \"I-MedicalDevice\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"label2id\": {\n",
" \"B-MedicalDevice\": 1,\n",
" \"I-MedicalDevice\": 2,\n",
" \"O\": 0\n",
" },\n",
" \"layer_norm_eps\": 1e-12,\n",
" \"max_position_embeddings\": 512,\n",
" \"model_type\": \"bert\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": null,\n",
" \"pooler_fc_size\": 768,\n",
" \"pooler_num_attention_heads\": 12,\n",
" \"pooler_num_fc_layers\": 3,\n",
" \"pooler_size_per_head\": 128,\n",
" \"pooler_type\": \"first_token_transform\",\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 2,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 119547\n",
"}\n",
"\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading: 0%| | 0.00/996k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "42dcc74bff5440608a3e9f2fa580cd3c"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading: 0%| | 0.00/112 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "01a12f499b7942cc90f2032a8f3284e9"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/vocab.txt\n",
"loading file tokenizer.json from cache at None\n",
"loading file added_tokens.json from cache at None\n",
"loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/special_tokens_map.json\n",
"loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/tokenizer_config.json\n",
"loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/config.json\n",
"Model config BertConfig {\n",
" \"_name_or_path\": \"pucpr/clinicalnerpt-medical\",\n",
" \"_num_labels\": 3,\n",
" \"architectures\": [\n",
" \"BertForTokenClassification\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"classifier_dropout\": null,\n",
" \"directionality\": \"bidi\",\n",
" \"eos_token_ids\": null,\n",
" \"gradient_checkpointing\": false,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"id2label\": {\n",
" \"0\": \"O\",\n",
" \"1\": \"B-MedicalDevice\",\n",
" \"2\": \"I-MedicalDevice\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"label2id\": {\n",
" \"B-MedicalDevice\": 1,\n",
" \"I-MedicalDevice\": 2,\n",
" \"O\": 0\n",
" },\n",
" \"layer_norm_eps\": 1e-12,\n",
" \"max_position_embeddings\": 512,\n",
" \"model_type\": \"bert\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": null,\n",
" \"pooler_fc_size\": 768,\n",
" \"pooler_num_attention_heads\": 12,\n",
" \"pooler_num_fc_layers\": 3,\n",
" \"pooler_size_per_head\": 128,\n",
" \"pooler_type\": \"first_token_transform\",\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 2,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 119547\n",
"}\n",
"\n",
"loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/config.json\n",
"Model config BertConfig {\n",
" \"_name_or_path\": \"pucpr/clinicalnerpt-medical\",\n",
" \"_num_labels\": 3,\n",
" \"architectures\": [\n",
" \"BertForTokenClassification\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"classifier_dropout\": null,\n",
" \"directionality\": \"bidi\",\n",
" \"eos_token_ids\": null,\n",
" \"gradient_checkpointing\": false,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"id2label\": {\n",
" \"0\": \"O\",\n",
" \"1\": \"B-MedicalDevice\",\n",
" \"2\": \"I-MedicalDevice\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"label2id\": {\n",
" \"B-MedicalDevice\": 1,\n",
" \"I-MedicalDevice\": 2,\n",
" \"O\": 0\n",
" },\n",
" \"layer_norm_eps\": 1e-12,\n",
" \"max_position_embeddings\": 512,\n",
" \"model_type\": \"bert\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": null,\n",
" \"pooler_fc_size\": 768,\n",
" \"pooler_num_attention_heads\": 12,\n",
" \"pooler_num_fc_layers\": 3,\n",
" \"pooler_size_per_head\": 128,\n",
" \"pooler_type\": \"first_token_transform\",\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 2,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 119547\n",
"}\n",
"\n"
]
}
],
"source": [
"model_checkpoint = \"pucpr/clinicalnerpt-medical\"\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_prefix_space=True)"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "JPTd6vUPKpLg",
"outputId": "84451d5e-eb3a-4763-dc29-5ffebafc4c3d"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 81
}
],
"source": [
"tokenizer.is_fast"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {
"id": "q8o2kvS69K2h"
},
"outputs": [],
"source": [
"dic = {\"tokens\": HCs_tokenized, \"ner_tags\": labels_tokenized} #For the whole clinical case. We used this option for our paper.\n",
"#dic = {\"tokens\": sent_tokenized, \"ner_tags\": label_sent_tokenized} #Use this option if you want to check the model performance with sentences tokenized by \". \" b"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {
"id": "6JXCzYF49K2h"
},
"outputs": [],
"source": [
"dataset = Dataset.from_dict(dic)"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "f01f435d-ced1-4002-cc23-05e42329c8df",
"id": "AGI1Hf_E9K2h"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Dataset({\n",
" features: ['tokens', 'ner_tags'],\n",
" num_rows: 741\n",
"})"
]
},
"metadata": {},
"execution_count": 84
}
],
"source": [
"dataset"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {
"id": "CUH94L-29K2h"
},
"outputs": [],
"source": [
"#For training, validation, and test partitions\n",
"\"\"\"\n",
"#Train, val, test partitions\n",
"train_test = dataset.train_test_split()\n",
"test_val = train_test['test'].train_test_split()\n",
"raw_datasets = DatasetDict({\n",
" 'train': train_test['train'],\n",
" 'validation': test_val['train'],\n",
" 'test': test_val['test']\n",
" })\n",
"\"\"\"\n",
"\n",
"#Just for training and validation partitions\n",
"train_test = dataset.train_test_split()\n",
"raw_datasets = DatasetDict({\n",
" 'train': train_test['train'],\n",
" 'validation': train_test['test']\n",
" })"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "654a2798-e4f3-49ce-c59c-747c7538352b",
"id": "zRYC70NF9K2i"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"DatasetDict({\n",
" train: Dataset({\n",
" features: ['tokens', 'ner_tags'],\n",
" num_rows: 555\n",
" })\n",
" validation: Dataset({\n",
" features: ['tokens', 'ner_tags'],\n",
" num_rows: 186\n",
" })\n",
"})"
]
},
"metadata": {},
"execution_count": 86
}
],
"source": [
"raw_datasets"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e1de590a-ea92-4b0b-9da3-c65152d08872",
"id": "dCZcYkCa9K2i"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 2,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 1,\n",
" 2,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 2,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 2,\n",
" 2,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 2,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 2,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 2,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0]"
]
},
"metadata": {},
"execution_count": 87
}
],
"source": [
"raw_datasets[\"train\"][0][\"ner_tags\"]\n",
"#raw_datasets[\"train\"][0][\"pos_tags\"]\n",
"#raw_datasets[\"train\"][0][\"chunk_tags\"]"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "6b0070ab-8b15-4750-e848-a122ca47e050",
"id": "wWinDHdc9K2i"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Dataset({\n",
" features: ['tokens', 'ner_tags'],\n",
" num_rows: 555\n",
"})"
]
},
"metadata": {},
"execution_count": 88
}
],
"source": [
"raw_datasets['train']"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "ea900262-0ead-4843-da63-d1b736a2ad29",
"id": "NeEdChWM9K2i"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['O', 'B', 'I']"
]
},
"metadata": {},
"execution_count": 89
}
],
"source": [
"label_names = ['O','B','I']\n",
"label_names"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e264924f-3c29-4f64-d8f6-4441ca35c9b1",
"id": "FwWqjcpJ9K2i"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"This is a 79 - year - old woman with a history of hypertension , osteoporosis and hysterectomy for myomatosis at the age of 50 . She underwent transurethral resection of infiltrating bladder carcinoma in October 2006 . She subsequently received radiotherapy sessions up to a total of 50 Gy due to persistence of an external tumour mass in the right angle of the bladder , finishing this treatment in June 2007 . In August 2007 she began chemotherapy treatment due to persistence of the bladder lesion and metastases in the spine detected by follow - up CT scan and bone scintigraphy . Her digestive history began in February 2008 when she was admitted for episodes of rectorrhagia , initially scarce and distal , but which soon became more frequent and profuse , accompanied by symptoms of haemodynamic instability and severe anaemia with extensive transfusion requirements . \n",
" Total colonoscopy was performed , showing only changes typical of actinic proctitis with large friable and bleeding neovascular lesions ; treatment was carried out with argon plasma ( APC ) . The patient 's clinical course was unfavourable : she received consecutive treatment with steroid enemas , 5 - aminosalicylic acid and sucralfate ; in addition , three more therapeutic rectoscopies were performed , applying APC treatment , despite which the episodes of profuse rectorrhagia with haemodynamic instability persisted , requiring transfusion of a total of 21 red blood cell concentrates throughout the patient 's hospitalisation period , despite also receiving oral and intravenous ferrotherapy . \n",
" In the absence of response to these treatments , the case was discussed with the surgeon for topical treatment with formalin , who , using spinal anaesthesia and anal dilatation , treated the rectal ampulla for 10 minutes with a 10 % formalin solution 200 ml + 300 ml of water ; The patient 's tolerance to the procedure was excellent , it was carried out without any complications and from that moment onwards the patient was completely asymptomatic without new episodes of haemorrhagic externalisation , haemodynamic instability or new transfusion requirements ; she was discharged and a follow - up colonoscopy was proposed after the treatment , but she did not accept . \n",
" Four months later , she was admitted again for clinical signs of tumour progression , and symptomatic treatment was decided by the Oncology Department , and the patient died , but without recurrence of the rectorrhagia . \n",
"\n",
"\n",
" \n",
"O O O O O O O O O O O O O O O B I O O B O O O O O O O O O O O B B I O O O O O O O O O O O O O O O O O O O O O O B I O O O O O O B O O O O O O O O O O O O O O O O O O O O B I I O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B I O O O O O O O O O O O O O O O O B I O O B I I I I O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B I O O O O O O O O O O O O O O O O O O O O O O O O \n"
]
}
],
"source": [
"words = raw_datasets[\"train\"][0][\"tokens\"]\n",
"labels = [int(n) for n in raw_datasets[\"train\"][0][\"ner_tags\"]]\n",
"#labels = raw_datasets[\"train\"][0][\"pos_tags\"]\n",
"#labels = raw_datasets[\"train\"][0][\"chunk_tags\"]\n",
"line1 = \"\"\n",
"line2 = \"\"\n",
"for word, label in zip(words, labels):\n",
" full_label = label_names[label]\n",
" max_length = max(len(word), len(full_label))\n",
" line1 += word + \" \" * (max_length - len(word) + 1)\n",
" line2 += full_label + \" \" * (max_length - len(full_label) + 1)\n",
"\n",
"print(line1)\n",
"print(line2)"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Bhx4OJLrKpLg",
"outputId": "08eb850e-d3a5-41e0-9dcf-89f7e7748b02"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"Token indices sequence length is longer than the specified maximum sequence length for this model (578 > 512). Running this sequence through the model will result in indexing errors\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['[CLS]',\n",
" 'this',\n",
" 'is',\n",
" 'a',\n",
" '79',\n",
" '-',\n",
" 'year',\n",
" '-',\n",
" 'old',\n",
" 'woman',\n",
" 'with',\n",
" 'a',\n",
" 'history',\n",
" 'of',\n",
" 'hy',\n",
" '##pert',\n",
" '##ension',\n",
" ',',\n",
" 'os',\n",
" '##te',\n",
" '##op',\n",
" '##oros',\n",
" '##is',\n",
" 'and',\n",
" 'hy',\n",
" '##ster',\n",
" '##ect',\n",
" '##omy',\n",
" 'for',\n",
" 'my',\n",
" '##oma',\n",
" '##tos',\n",
" '##is',\n",
" 'at',\n",
" 'the',\n",
" 'age',\n",
" 'of',\n",
" '50',\n",
" '.',\n",
" 'she',\n",
" 'underwent',\n",
" 'trans',\n",
" '##ure',\n",
" '##th',\n",
" '##ral',\n",
" 'res',\n",
" '##ection',\n",
" 'of',\n",
" 'in',\n",
" '##fil',\n",
" '##trat',\n",
" '##ing',\n",
" 'blad',\n",
" '##der',\n",
" 'car',\n",
" '##cino',\n",
" '##ma',\n",
" 'in',\n",
" 'o',\n",
" '##cto',\n",
" '##ber',\n",
" '2006',\n",
" '.',\n",
" 'she',\n",
" 'subsequently',\n",
" 'received',\n",
" 'radio',\n",
" '##ther',\n",
" '##ap',\n",
" '##y',\n",
" 'sessions',\n",
" 'up',\n",
" 'to',\n",
" 'a',\n",
" 'total',\n",
" 'of',\n",
" '50',\n",
" 'g',\n",
" '##y',\n",
" 'due',\n",
" 'to',\n",
" 'pers',\n",
" '##isten',\n",
" '##ce',\n",
" 'of',\n",
" 'an',\n",
" 'external',\n",
" 'tu',\n",
" '##mou',\n",
" '##r',\n",
" 'mass',\n",
" 'in',\n",
" 'the',\n",
" 'right',\n",
" 'angle',\n",
" 'of',\n",
" 'the',\n",
" 'blad',\n",
" '##der',\n",
" ',',\n",
" 'finishing',\n",
" 'this',\n",
" 'treatment',\n",
" 'in',\n",
" 'ju',\n",
" '##ne',\n",
" '2007',\n",
" '.',\n",
" 'in',\n",
" 'august',\n",
" '2007',\n",
" 'she',\n",
" 'began',\n",
" 'che',\n",
" '##mot',\n",
" '##hera',\n",
" '##py',\n",
" 'treatment',\n",
" 'due',\n",
" 'to',\n",
" 'pers',\n",
" '##isten',\n",
" '##ce',\n",
" 'of',\n",
" 'the',\n",
" 'blad',\n",
" '##der',\n",
" 'les',\n",
" '##ion',\n",
" 'and',\n",
" 'meta',\n",
" '##stas',\n",
" '##es',\n",
" 'in',\n",
" 'the',\n",
" 'spin',\n",
" '##e',\n",
" 'det',\n",
" '##ected',\n",
" 'by',\n",
" 'follow',\n",
" '-',\n",
" 'up',\n",
" 'c',\n",
" '##t',\n",
" 's',\n",
" '##can',\n",
" 'and',\n",
" 'bone',\n",
" 'sci',\n",
" '##nti',\n",
" '##graphy',\n",
" '.',\n",
" 'her',\n",
" 'dig',\n",
" '##esti',\n",
" '##ve',\n",
" 'history',\n",
" 'began',\n",
" 'in',\n",
" 'februar',\n",
" '##y',\n",
" '2008',\n",
" 'when',\n",
" 'she',\n",
" 'was',\n",
" 'admitted',\n",
" 'for',\n",
" 'episodes',\n",
" 'of',\n",
" 'rector',\n",
" '##r',\n",
" '##ha',\n",
" '##gia',\n",
" ',',\n",
" 'initially',\n",
" 's',\n",
" '##car',\n",
" '##ce',\n",
" 'and',\n",
" 'dis',\n",
" '##tal',\n",
" ',',\n",
" 'but',\n",
" 'which',\n",
" 'soon',\n",
" 'became',\n",
" 'more',\n",
" 'frequent',\n",
" 'and',\n",
" 'prof',\n",
" '##use',\n",
" ',',\n",
" 'accompanied',\n",
" 'by',\n",
" 'symptoms',\n",
" 'of',\n",
" 'hae',\n",
" '##mo',\n",
" '##dyn',\n",
" '##ami',\n",
" '##c',\n",
" 'ins',\n",
" '##tab',\n",
" '##ility',\n",
" 'and',\n",
" 'severe',\n",
" 'ana',\n",
" '##emia',\n",
" 'with',\n",
" 'extensive',\n",
" 'trans',\n",
" '##fus',\n",
" '##ion',\n",
" 'requirements',\n",
" '.',\n",
" 'total',\n",
" 'colonos',\n",
" '##co',\n",
" '##py',\n",
" 'was',\n",
" 'performed',\n",
" ',',\n",
" 'showing',\n",
" 'only',\n",
" 'changes',\n",
" 'typical',\n",
" 'of',\n",
" 'act',\n",
" '##ini',\n",
" '##c',\n",
" 'pro',\n",
" '##cti',\n",
" '##tis',\n",
" 'with',\n",
" 'large',\n",
" 'fri',\n",
" '##able',\n",
" 'and',\n",
" 'ble',\n",
" '##eding',\n",
" 'neo',\n",
" '##vas',\n",
" '##cular',\n",
" 'les',\n",
" '##ions',\n",
" ';',\n",
" 'treatment',\n",
" 'was',\n",
" 'carried',\n",
" 'out',\n",
" 'with',\n",
" 'ar',\n",
" '##gon',\n",
" 'plasma',\n",
" '(',\n",
" 'ap',\n",
" '##c',\n",
" ')',\n",
" '.',\n",
" 'the',\n",
" 'patient',\n",
" \"'\",\n",
" 's',\n",
" 'clinical',\n",
" 'course',\n",
" 'was',\n",
" 'un',\n",
" '##fa',\n",
" '##vour',\n",
" '##able',\n",
" ':',\n",
" 'she',\n",
" 'received',\n",
" 'consecutive',\n",
" 'treatment',\n",
" 'with',\n",
" 'ster',\n",
" '##oid',\n",
" 'ene',\n",
" '##mas',\n",
" ',',\n",
" '5',\n",
" '-',\n",
" 'amino',\n",
" '##sal',\n",
" '##icy',\n",
" '##lic',\n",
" 'acid',\n",
" 'and',\n",
" 'su',\n",
" '##cra',\n",
" '##lfa',\n",
" '##te',\n",
" ';',\n",
" 'in',\n",
" 'addition',\n",
" ',',\n",
" 'three',\n",
" 'more',\n",
" 'the',\n",
" '##rap',\n",
" '##eu',\n",
" '##tic',\n",
" 're',\n",
" '##ctos',\n",
" '##co',\n",
" '##pies',\n",
" 'were',\n",
" 'performed',\n",
" ',',\n",
" 'apply',\n",
" '##ing',\n",
" 'ap',\n",
" '##c',\n",
" 'treatment',\n",
" ',',\n",
" 'despite',\n",
" 'which',\n",
" 'the',\n",
" 'episodes',\n",
" 'of',\n",
" 'prof',\n",
" '##use',\n",
" 'rector',\n",
" '##r',\n",
" '##ha',\n",
" '##gia',\n",
" 'with',\n",
" 'hae',\n",
" '##mo',\n",
" '##dyn',\n",
" '##ami',\n",
" '##c',\n",
" 'ins',\n",
" '##tab',\n",
" '##ility',\n",
" 'pers',\n",
" '##isted',\n",
" ',',\n",
" 'requiring',\n",
" 'trans',\n",
" '##fus',\n",
" '##ion',\n",
" 'of',\n",
" 'a',\n",
" 'total',\n",
" 'of',\n",
" '21',\n",
" 'red',\n",
" 'blood',\n",
" 'cell',\n",
" 'con',\n",
" '##centra',\n",
" '##tes',\n",
" 'throughout',\n",
" 'the',\n",
" 'patient',\n",
" \"'\",\n",
" 's',\n",
" 'hospital',\n",
" '##isation',\n",
" 'period',\n",
" ',',\n",
" 'despite',\n",
" 'also',\n",
" 'receiving',\n",
" 'oral',\n",
" 'and',\n",
" 'intra',\n",
" '##veno',\n",
" '##us',\n",
" 'ferro',\n",
" '##ther',\n",
" '##ap',\n",
" '##y',\n",
" '.',\n",
" 'in',\n",
" 'the',\n",
" 'absence',\n",
" 'of',\n",
" 'response',\n",
" 'to',\n",
" 'these',\n",
" 'treatment',\n",
" '##s',\n",
" ',',\n",
" 'the',\n",
" 'case',\n",
" 'was',\n",
" 'discussed',\n",
" 'with',\n",
" 'the',\n",
" 'surgeon',\n",
" 'for',\n",
" 'topic',\n",
" '##al',\n",
" 'treatment',\n",
" 'with',\n",
" 'formal',\n",
" '##in',\n",
" ',',\n",
" 'who',\n",
" ',',\n",
" 'using',\n",
" 'spin',\n",
" '##al',\n",
" 'ana',\n",
" '##est',\n",
" '##hes',\n",
" '##ia',\n",
" 'and',\n",
" 'anal',\n",
" 'dil',\n",
" '##ata',\n",
" '##tion',\n",
" ',',\n",
" 'treated',\n",
" 'the',\n",
" 'recta',\n",
" '##l',\n",
" 'am',\n",
" '##pul',\n",
" '##la',\n",
" 'for',\n",
" '10',\n",
" 'minutes',\n",
" 'with',\n",
" 'a',\n",
" '10',\n",
" '%',\n",
" 'formal',\n",
" '##in',\n",
" 'solution',\n",
" '200',\n",
" 'ml',\n",
" '+',\n",
" '300',\n",
" 'ml',\n",
" 'of',\n",
" 'water',\n",
" ';',\n",
" 'the',\n",
" 'patient',\n",
" \"'\",\n",
" 's',\n",
" 'tol',\n",
" '##erance',\n",
" 'to',\n",
" 'the',\n",
" 'procedure',\n",
" 'was',\n",
" 'excellent',\n",
" ',',\n",
" 'it',\n",
" 'was',\n",
" 'carried',\n",
" 'out',\n",
" 'without',\n",
" 'any',\n",
" 'com',\n",
" '##plications',\n",
" 'and',\n",
" 'from',\n",
" 'that',\n",
" 'moment',\n",
" 'onwards',\n",
" 'the',\n",
" 'patient',\n",
" 'was',\n",
" 'completely',\n",
" 'as',\n",
" '##ym',\n",
" '##pt',\n",
" '##oma',\n",
" '##tic',\n",
" 'without',\n",
" 'new',\n",
" 'episodes',\n",
" 'of',\n",
" 'hae',\n",
" '##mor',\n",
" '##r',\n",
" '##ha',\n",
" '##gic',\n",
" 'external',\n",
" '##isation',\n",
" ',',\n",
" 'hae',\n",
" '##mo',\n",
" '##dyn',\n",
" '##ami',\n",
" '##c',\n",
" 'ins',\n",
" '##tab',\n",
" '##ility',\n",
" 'or',\n",
" 'new',\n",
" 'trans',\n",
" '##fus',\n",
" '##ion',\n",
" 'requirements',\n",
" ';',\n",
" 'she',\n",
" 'was',\n",
" 'disc',\n",
" '##harge',\n",
" '##d',\n",
" 'and',\n",
" 'a',\n",
" 'follow',\n",
" '-',\n",
" 'up',\n",
" 'colonos',\n",
" '##co',\n",
" '##py',\n",
" 'was',\n",
" 'proposed',\n",
" 'after',\n",
" 'the',\n",
" 'treatment',\n",
" ',',\n",
" 'but',\n",
" 'she',\n",
" 'did',\n",
" 'not',\n",
" 'accept',\n",
" '.',\n",
" 'four',\n",
" 'months',\n",
" 'later',\n",
" ',',\n",
" 'she',\n",
" 'was',\n",
" 'admitted',\n",
" 'again',\n",
" 'for',\n",
" 'clinical',\n",
" 'signs',\n",
" 'of',\n",
" 'tu',\n",
" '##mou',\n",
" '##r',\n",
" 'progression',\n",
" ',',\n",
" 'and',\n",
" 'sy',\n",
" '##mpt',\n",
" '##oma',\n",
" '##tic',\n",
" 'treatment',\n",
" 'was',\n",
" 'decided',\n",
" 'by',\n",
" 'the',\n",
" 'on',\n",
" '##cology',\n",
" 'department',\n",
" ',',\n",
" 'and',\n",
" 'the',\n",
" 'patient',\n",
" 'died',\n",
" ',',\n",
" 'but',\n",
" 'without',\n",
" 're',\n",
" '##cu',\n",
" '##rren',\n",
" '##ce',\n",
" 'of',\n",
" 'the',\n",
" 'rector',\n",
" '##r',\n",
" '##ha',\n",
" '##gia',\n",
" '.',\n",
" '[SEP]']"
]
},
"metadata": {},
"execution_count": 91
}
],
"source": [
"inputs = tokenizer(raw_datasets[\"train\"][0][\"tokens\"], is_split_into_words=True)\n",
"inputs.tokens()"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "EQskMV19KpLh",
"outputId": "9fbf1919-5853-4cd7-ea80-475edeb3b482"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -100]\n"
]
}
],
"source": [
"labels = raw_datasets[\"train\"][0][\"ner_tags\"]\n",
"word_ids = inputs.word_ids()\n",
"print(labels)\n",
"print(align_labels_with_tokens(labels, word_ids))"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 81,
"referenced_widgets": [
"84631a71d2ca4ae8a781019ea3ce6da9",
"9666c2ff4f32449ea3cbef076a166836",
"efc4af4547804d9daae235691942e73a",
"c8194cd34f554a789359eab6e7596291",
"54f649d297ec4456be5b5df14497fb93",
"e35bc6f834c4490e85ac2ae25d9e922f",
"b166c61328bb49ea803f0d3a7d515d81",
"a2aac740ef3b4f3c913b71c82b408c2c",
"d77e188e43dc4e01b82054f2a6a8e832",
"266b2c90bb4d41198784f016e996066a",
"cc62b20cee8c4a4b8e24576d1c854fbf",
"98bebe04cb254369bb3b6b991d4b2648",
"40080df663cc43749963657150cf632d",
"a04f157a98db4d47b75094b6ef1b0990",
"ba2967950f4c483ea399827046f52963",
"a9e0ad6a141a462fb9bea1c18d447332",
"31fbaf0ffb0845f5800e6fca0353b929",
"4a2c17e757d34547a4a68718ef064073",
"f74d219071ab49479194f1061bf343be",
"a92097360dba4d5c848b48e345b0028e",
"24a164b22a8f4e00944ef05bcec5d032",
"976ade0b37cd43e2aa5aa272dac2445b"
]
},
"id": "z_6q0eitKpLh",
"outputId": "4c9c0ce3-5855-49e3-c4da-bbe068361c63"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
" 0%| | 0/1 [00:00<?, ?ba/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "84631a71d2ca4ae8a781019ea3ce6da9"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
" 0%| | 0/1 [00:00<?, ?ba/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "98bebe04cb254369bb3b6b991d4b2648"
}
},
"metadata": {}
}
],
"source": [
"tokenized_datasets = raw_datasets.map(\n",
" tokenize_and_align_labels,\n",
" batched=True,\n",
" remove_columns=raw_datasets[\"train\"].column_names,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {
"id": "K3GGBsIIKpLh"
},
"outputs": [],
"source": [
"data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1M1bHT_pKpLi",
"outputId": "19fc29d4-cd88-4fe7-e963-71a59d0434ca"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"tensor([[-100, 0, 0, ..., 0, 0, -100],\n",
" [-100, 0, 0, ..., -100, -100, -100]])"
]
},
"metadata": {},
"execution_count": 95
}
],
"source": [
"batch = data_collator([tokenized_datasets[\"train\"][i] for i in range(2)])\n",
"batch[\"labels\"]"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {
"id": "maAO4nNXKpLi"
},
"outputs": [],
"source": [
"id2label = {str(i): label for i, label in enumerate(label_names)}\n",
"label2id = {v: k for k, v in id2label.items()}"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 954,
"referenced_widgets": [
"21b02caa5dc146b8ac2bd1a282381c7f",
"0c01b39e34744f74ae47d0d4e70638ce",
"82e7357418144359abba3548449c0c08",
"2ff5e18b6d684b99a82676dbf3db6d32",
"cd75e771337843d9b55838502bed9a1b",
"b6a4250c705f4dd8b9f52731cce2a23d",
"4549eb0838864025ac6a0f3da9192818",
"b2d377844c1a4bc09433a94088f5213e",
"fe7e058b9a6944969d83f7e72b398bb1",
"47d5ccd1eafe4ea1a3476e06d998bd74",
"5a17027205cb4c2bbe140e1e96e4b495"
]
},
"id": "Q0T8WOGBKpLi",
"outputId": "d950423b-480b-495e-f02e-667ec124dcb4"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/config.json\n",
"Model config BertConfig {\n",
" \"_name_or_path\": \"pucpr/clinicalnerpt-medical\",\n",
" \"_num_labels\": 3,\n",
" \"architectures\": [\n",
" \"BertForTokenClassification\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"classifier_dropout\": null,\n",
" \"directionality\": \"bidi\",\n",
" \"eos_token_ids\": null,\n",
" \"gradient_checkpointing\": false,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"id2label\": {\n",
" \"0\": \"O\",\n",
" \"1\": \"B-MedicalDevice\",\n",
" \"2\": \"I-MedicalDevice\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"label2id\": {\n",
" \"B-MedicalDevice\": 1,\n",
" \"I-MedicalDevice\": 2,\n",
" \"O\": 0\n",
" },\n",
" \"layer_norm_eps\": 1e-12,\n",
" \"max_position_embeddings\": 512,\n",
" \"model_type\": \"bert\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": null,\n",
" \"pooler_fc_size\": 768,\n",
" \"pooler_num_attention_heads\": 12,\n",
" \"pooler_num_fc_layers\": 3,\n",
" \"pooler_size_per_head\": 128,\n",
" \"pooler_type\": \"first_token_transform\",\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 2,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 119547\n",
"}\n",
"\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading: 0%| | 0.00/709M [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "21b02caa5dc146b8ac2bd1a282381c7f"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/pytorch_model.bin\n",
"All model checkpoint weights were used when initializing BertForTokenClassification.\n",
"\n",
"All the weights of BertForTokenClassification were initialized from the model checkpoint at pucpr/clinicalnerpt-medical.\n",
"If your task is similar to the task the model of the checkpoint was trained on, you can already use BertForTokenClassification for predictions without further training.\n"
]
}
],
"source": [
"model = AutoModelForTokenClassification.from_pretrained( \n",
" model_checkpoint\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {
"id": "k6ExcF0UKpLi",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "68661940-56bd-4adc-ec64-37f1db50604c"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"PyTorch: setting up devices\n",
"The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
]
}
],
"source": [
"args = TrainingArguments(\n",
" \"NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased\",\n",
" evaluation_strategy = IntervalStrategy.STEPS,\n",
" eval_steps = 50,\n",
" learning_rate=5e-5,\n",
" num_train_epochs=50,\n",
" weight_decay=0.01,\n",
" metric_for_best_model = 'f1',\n",
" load_best_model_at_end=True\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"id": "gQSIB3FfKpLj",
"outputId": "72ede653-dc87-4230-94b2-805a52f1f5d2"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:310: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" FutureWarning,\n",
"***** Running training *****\n",
" Num examples = 555\n",
" Num Epochs = 50\n",
" Instantaneous batch size per device = 8\n",
" Total train batch size (w. parallel, distributed & accumulation) = 8\n",
" Gradient Accumulation steps = 1\n",
" Total optimization steps = 3500\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='1150' max='3500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [1150/3500 05:32 < 11:20, 3.45 it/s, Epoch 16/50]\n",
" </div>\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>Step</th>\n",
" <th>Training Loss</th>\n",
" <th>Validation Loss</th>\n",
" <th>Precision</th>\n",
" <th>Recall</th>\n",
" <th>F1</th>\n",
" <th>Accuracy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>50</td>\n",
" <td>No log</td>\n",
" <td>0.160077</td>\n",
" <td>0.281675</td>\n",
" <td>0.325786</td>\n",
" <td>0.302129</td>\n",
" <td>0.934798</td>\n",
" </tr>\n",
" <tr>\n",
" <td>100</td>\n",
" <td>No log</td>\n",
" <td>0.156098</td>\n",
" <td>0.440468</td>\n",
" <td>0.425786</td>\n",
" <td>0.433003</td>\n",
" <td>0.941619</td>\n",
" </tr>\n",
" <tr>\n",
" <td>150</td>\n",
" <td>No log</td>\n",
" <td>0.175201</td>\n",
" <td>0.430540</td>\n",
" <td>0.491195</td>\n",
" <td>0.458872</td>\n",
" <td>0.943305</td>\n",
" </tr>\n",
" <tr>\n",
" <td>200</td>\n",
" <td>No log</td>\n",
" <td>0.181505</td>\n",
" <td>0.465023</td>\n",
" <td>0.438994</td>\n",
" <td>0.451634</td>\n",
" <td>0.944750</td>\n",
" </tr>\n",
" <tr>\n",
" <td>250</td>\n",
" <td>No log</td>\n",
" <td>0.198468</td>\n",
" <td>0.437112</td>\n",
" <td>0.487421</td>\n",
" <td>0.460898</td>\n",
" <td>0.942113</td>\n",
" </tr>\n",
" <tr>\n",
" <td>300</td>\n",
" <td>No log</td>\n",
" <td>0.213199</td>\n",
" <td>0.454245</td>\n",
" <td>0.518239</td>\n",
" <td>0.484136</td>\n",
" <td>0.943191</td>\n",
" </tr>\n",
" <tr>\n",
" <td>350</td>\n",
" <td>No log</td>\n",
" <td>0.225870</td>\n",
" <td>0.439457</td>\n",
" <td>0.529560</td>\n",
" <td>0.480319</td>\n",
" <td>0.941974</td>\n",
" </tr>\n",
" <tr>\n",
" <td>400</td>\n",
" <td>No log</td>\n",
" <td>0.252911</td>\n",
" <td>0.509901</td>\n",
" <td>0.453459</td>\n",
" <td>0.480027</td>\n",
" <td>0.945802</td>\n",
" </tr>\n",
" <tr>\n",
" <td>450</td>\n",
" <td>No log</td>\n",
" <td>0.248783</td>\n",
" <td>0.475821</td>\n",
" <td>0.501258</td>\n",
" <td>0.488208</td>\n",
" <td>0.945092</td>\n",
" </tr>\n",
" <tr>\n",
" <td>500</td>\n",
" <td>0.078000</td>\n",
" <td>0.230288</td>\n",
" <td>0.441730</td>\n",
" <td>0.545912</td>\n",
" <td>0.488326</td>\n",
" <td>0.941631</td>\n",
" </tr>\n",
" <tr>\n",
" <td>550</td>\n",
" <td>0.078000</td>\n",
" <td>0.257168</td>\n",
" <td>0.470556</td>\n",
" <td>0.537736</td>\n",
" <td>0.501908</td>\n",
" <td>0.944002</td>\n",
" </tr>\n",
" <tr>\n",
" <td>600</td>\n",
" <td>0.078000</td>\n",
" <td>0.264445</td>\n",
" <td>0.509816</td>\n",
" <td>0.506289</td>\n",
" <td>0.508047</td>\n",
" <td>0.946145</td>\n",
" </tr>\n",
" <tr>\n",
" <td>650</td>\n",
" <td>0.078000</td>\n",
" <td>0.277278</td>\n",
" <td>0.448800</td>\n",
" <td>0.576101</td>\n",
" <td>0.504544</td>\n",
" <td>0.943115</td>\n",
" </tr>\n",
" <tr>\n",
" <td>700</td>\n",
" <td>0.078000</td>\n",
" <td>0.278563</td>\n",
" <td>0.504016</td>\n",
" <td>0.473585</td>\n",
" <td>0.488327</td>\n",
" <td>0.946145</td>\n",
" </tr>\n",
" <tr>\n",
" <td>750</td>\n",
" <td>0.078000</td>\n",
" <td>0.295773</td>\n",
" <td>0.501239</td>\n",
" <td>0.508805</td>\n",
" <td>0.504994</td>\n",
" <td>0.946436</td>\n",
" </tr>\n",
" <tr>\n",
" <td>800</td>\n",
" <td>0.078000</td>\n",
" <td>0.300880</td>\n",
" <td>0.491329</td>\n",
" <td>0.534591</td>\n",
" <td>0.512048</td>\n",
" <td>0.945333</td>\n",
" </tr>\n",
" <tr>\n",
" <td>850</td>\n",
" <td>0.078000</td>\n",
" <td>0.315745</td>\n",
" <td>0.503695</td>\n",
" <td>0.514465</td>\n",
" <td>0.509023</td>\n",
" <td>0.944154</td>\n",
" </tr>\n",
" <tr>\n",
" <td>900</td>\n",
" <td>0.078000</td>\n",
" <td>0.303611</td>\n",
" <td>0.461154</td>\n",
" <td>0.522642</td>\n",
" <td>0.489976</td>\n",
" <td>0.944839</td>\n",
" </tr>\n",
" <tr>\n",
" <td>950</td>\n",
" <td>0.078000</td>\n",
" <td>0.309813</td>\n",
" <td>0.502151</td>\n",
" <td>0.513836</td>\n",
" <td>0.507927</td>\n",
" <td>0.947552</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1000</td>\n",
" <td>0.009800</td>\n",
" <td>0.335969</td>\n",
" <td>0.520309</td>\n",
" <td>0.507547</td>\n",
" <td>0.513849</td>\n",
" <td>0.946829</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1050</td>\n",
" <td>0.009800</td>\n",
" <td>0.313057</td>\n",
" <td>0.484645</td>\n",
" <td>0.545912</td>\n",
" <td>0.513458</td>\n",
" <td>0.946018</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1100</td>\n",
" <td>0.009800</td>\n",
" <td>0.297767</td>\n",
" <td>0.478187</td>\n",
" <td>0.530818</td>\n",
" <td>0.503130</td>\n",
" <td>0.947780</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1150</td>\n",
" <td>0.009800</td>\n",
" <td>0.317451</td>\n",
" <td>0.496936</td>\n",
" <td>0.510063</td>\n",
" <td>0.503414</td>\n",
" <td>0.946601</td>\n",
" </tr>\n",
" </tbody>\n",
"</table><p>"
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500\n",
"Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/config.json\n",
"Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/pytorch_model.bin\n",
"tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/tokenizer_config.json\n",
"Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/special_tokens_map.json\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000\n",
"Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/config.json\n",
"Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/pytorch_model.bin\n",
"tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/tokenizer_config.json\n",
"Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/special_tokens_map.json\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 186\n",
" Batch size = 8\n",
"\n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"Loading best model from NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000 (score: 0.5138490926456541).\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"TrainOutput(global_step=1150, training_loss=0.03881311048632083, metrics={'train_runtime': 332.8101, 'train_samples_per_second': 83.381, 'train_steps_per_second': 10.517, 'total_flos': 2382353890443360.0, 'train_loss': 0.03881311048632083, 'epoch': 16.43})"
]
},
"metadata": {},
"execution_count": 99
}
],
"source": [
"trainer = Trainer(\n",
" model=model,\n",
" args=args,\n",
" train_dataset=tokenized_datasets[\"train\"],\n",
" eval_dataset=tokenized_datasets[\"validation\"],\n",
" data_collator=data_collator,\n",
" compute_metrics=compute_metrics,\n",
" tokenizer=tokenizer,\n",
" callbacks = [EarlyStoppingCallback(early_stopping_patience=3)]\n",
")\n",
"trainer.train()"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "arJA0rVIKpLj",
"outputId": "12b3b4d0-2f59-4f46-9f20-e2aa3cd35398"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"Saving model checkpoint to model/multilingual-BERT-all-tokens\n",
"Configuration saved in model/multilingual-BERT-all-tokens/config.json\n",
"Model weights saved in model/multilingual-BERT-all-tokens/pytorch_model.bin\n",
"tokenizer config file saved in model/multilingual-BERT-all-tokens/tokenizer_config.json\n",
"Special tokens file saved in model/multilingual-BERT-all-tokens/special_tokens_map.json\n"
]
}
],
"source": [
"trainer.save_model('model/multilingual-BERT-all-tokens')"
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "D3Bdj1H9TxOp"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "kCThHmWQWZ57"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Model 2 - pucpr/clinicalnerpt-medical\n",
"\n",
"### Sentence Based Modelling"
],
"metadata": {
"id": "5fN4MRRx_fFZ"
}
},
{
"cell_type": "code",
"source": [
"dic = {\"tokens\": sent_tokenized, \"ner_tags\": label_sent_tokenized} #Use this option if you want to check the model performance with sentences tokenized by \". \" b"
],
"metadata": {
"id": "_E60UP1w_fFZ"
},
"execution_count": 101,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {
"id": "DY7gaYbj_fFa"
},
"outputs": [],
"source": [
"dataset = Dataset.from_dict(dic)"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "44a9f0cc-c1cf-4064-aa89-baffa435cf52",
"id": "FcLEVVcn_fFa"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Dataset({\n",
" features: ['tokens', 'ner_tags'],\n",
" num_rows: 11668\n",
"})"
]
},
"metadata": {},
"execution_count": 103
}
],
"source": [
"dataset"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {
"id": "vZqWyVo2_fFa"
},
"outputs": [],
"source": [
"#For training, validation, and test partitions\n",
"\"\"\"\n",
"#Train, val, test partitions\n",
"train_test = dataset.train_test_split()\n",
"test_val = train_test['test'].train_test_split()\n",
"raw_datasets = DatasetDict({\n",
" 'train': train_test['train'],\n",
" 'validation': test_val['train'],\n",
" 'test': test_val['test']\n",
" })\n",
"\"\"\"\n",
"\n",
"#Just for training and validation partitions\n",
"train_test = dataset.train_test_split()\n",
"raw_datasets = DatasetDict({\n",
" 'train': train_test['train'],\n",
" 'validation': train_test['test']\n",
" })"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "ddadc450-b61a-43f9-e5fd-90c8db32de6b",
"id": "GDv9hwpm_fFa"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"DatasetDict({\n",
" train: Dataset({\n",
" features: ['tokens', 'ner_tags'],\n",
" num_rows: 8751\n",
" })\n",
" validation: Dataset({\n",
" features: ['tokens', 'ner_tags'],\n",
" num_rows: 2917\n",
" })\n",
"})"
]
},
"metadata": {},
"execution_count": 105
}
],
"source": [
"raw_datasets"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "54b152d5-edaa-482d-afb5-a209403c7dbc",
"id": "ZnB_fXql_fFa"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
]
},
"metadata": {},
"execution_count": 106
}
],
"source": [
"raw_datasets[\"train\"][0][\"ner_tags\"]\n",
"#raw_datasets[\"train\"][0][\"pos_tags\"]\n",
"#raw_datasets[\"train\"][0][\"chunk_tags\"]"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "a7beafe3-27ce-4e03-904a-3b8a8e06d831",
"id": "egmgQAVt_fFa"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Dataset({\n",
" features: ['tokens', 'ner_tags'],\n",
" num_rows: 8751\n",
"})"
]
},
"metadata": {},
"execution_count": 107
}
],
"source": [
"raw_datasets['train']"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e0cd0121-e53b-4d8c-9b33-2d093d604378",
"id": "Hize89uK_fFa"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['O', 'B', 'I']"
]
},
"metadata": {},
"execution_count": 108
}
],
"source": [
"label_names = ['O','B','I']\n",
"label_names"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "a9fe31c8-45c9-4873-fb2c-cf24bd7c64a9",
"id": "bQf54Sst_fFa"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Since 2006 she had tried several times to lose weight , without success . \n",
"O O O O O O O O O O O O O O \n"
]
}
],
"source": [
"words = raw_datasets[\"train\"][0][\"tokens\"]\n",
"labels = [int(n) for n in raw_datasets[\"train\"][0][\"ner_tags\"]]\n",
"#labels = raw_datasets[\"train\"][0][\"pos_tags\"]\n",
"#labels = raw_datasets[\"train\"][0][\"chunk_tags\"]\n",
"line1 = \"\"\n",
"line2 = \"\"\n",
"for word, label in zip(words, labels):\n",
" full_label = label_names[label]\n",
" max_length = max(len(word), len(full_label))\n",
" line1 += word + \" \" * (max_length - len(word) + 1)\n",
" line2 += full_label + \" \" * (max_length - len(full_label) + 1)\n",
"\n",
"print(line1)\n",
"print(line2)"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "53863be6-1112-4c14-b34b-529492b236dc",
"id": "cNKxSKCT_fFb"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/config.json\n",
"Model config BertConfig {\n",
" \"_name_or_path\": \"pucpr/clinicalnerpt-medical\",\n",
" \"_num_labels\": 3,\n",
" \"architectures\": [\n",
" \"BertForTokenClassification\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"classifier_dropout\": null,\n",
" \"directionality\": \"bidi\",\n",
" \"eos_token_ids\": null,\n",
" \"gradient_checkpointing\": false,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"id2label\": {\n",
" \"0\": \"O\",\n",
" \"1\": \"B-MedicalDevice\",\n",
" \"2\": \"I-MedicalDevice\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"label2id\": {\n",
" \"B-MedicalDevice\": 1,\n",
" \"I-MedicalDevice\": 2,\n",
" \"O\": 0\n",
" },\n",
" \"layer_norm_eps\": 1e-12,\n",
" \"max_position_embeddings\": 512,\n",
" \"model_type\": \"bert\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": null,\n",
" \"pooler_fc_size\": 768,\n",
" \"pooler_num_attention_heads\": 12,\n",
" \"pooler_num_fc_layers\": 3,\n",
" \"pooler_size_per_head\": 128,\n",
" \"pooler_type\": \"first_token_transform\",\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 2,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 119547\n",
"}\n",
"\n",
"loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/vocab.txt\n",
"loading file tokenizer.json from cache at None\n",
"loading file added_tokens.json from cache at None\n",
"loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/special_tokens_map.json\n",
"loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/tokenizer_config.json\n",
"loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/config.json\n",
"Model config BertConfig {\n",
" \"_name_or_path\": \"pucpr/clinicalnerpt-medical\",\n",
" \"_num_labels\": 3,\n",
" \"architectures\": [\n",
" \"BertForTokenClassification\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"classifier_dropout\": null,\n",
" \"directionality\": \"bidi\",\n",
" \"eos_token_ids\": null,\n",
" \"gradient_checkpointing\": false,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"id2label\": {\n",
" \"0\": \"O\",\n",
" \"1\": \"B-MedicalDevice\",\n",
" \"2\": \"I-MedicalDevice\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"label2id\": {\n",
" \"B-MedicalDevice\": 1,\n",
" \"I-MedicalDevice\": 2,\n",
" \"O\": 0\n",
" },\n",
" \"layer_norm_eps\": 1e-12,\n",
" \"max_position_embeddings\": 512,\n",
" \"model_type\": \"bert\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": null,\n",
" \"pooler_fc_size\": 768,\n",
" \"pooler_num_attention_heads\": 12,\n",
" \"pooler_num_fc_layers\": 3,\n",
" \"pooler_size_per_head\": 128,\n",
" \"pooler_type\": \"first_token_transform\",\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 2,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 119547\n",
"}\n",
"\n",
"loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/config.json\n",
"Model config BertConfig {\n",
" \"_name_or_path\": \"pucpr/clinicalnerpt-medical\",\n",
" \"_num_labels\": 3,\n",
" \"architectures\": [\n",
" \"BertForTokenClassification\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"classifier_dropout\": null,\n",
" \"directionality\": \"bidi\",\n",
" \"eos_token_ids\": null,\n",
" \"gradient_checkpointing\": false,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"id2label\": {\n",
" \"0\": \"O\",\n",
" \"1\": \"B-MedicalDevice\",\n",
" \"2\": \"I-MedicalDevice\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"label2id\": {\n",
" \"B-MedicalDevice\": 1,\n",
" \"I-MedicalDevice\": 2,\n",
" \"O\": 0\n",
" },\n",
" \"layer_norm_eps\": 1e-12,\n",
" \"max_position_embeddings\": 512,\n",
" \"model_type\": \"bert\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": null,\n",
" \"pooler_fc_size\": 768,\n",
" \"pooler_num_attention_heads\": 12,\n",
" \"pooler_num_fc_layers\": 3,\n",
" \"pooler_size_per_head\": 128,\n",
" \"pooler_type\": \"first_token_transform\",\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 2,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 119547\n",
"}\n",
"\n"
]
}
],
"source": [
"model_checkpoint = \"pucpr/clinicalnerpt-medical\"\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_prefix_space=True)"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "1b408280-d763-4487-e0c0-06823ddc8daa",
"id": "Gm0lsFPN_fFb"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 111
}
],
"source": [
"tokenizer.is_fast"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "a3d8f036-bf64-4f4d-e793-9244e9bd1043",
"id": "5Awp28lM_fFb"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['[CLS]',\n",
" 'since',\n",
" '2006',\n",
" 'she',\n",
" 'had',\n",
" 'tried',\n",
" 'several',\n",
" 'times',\n",
" 'to',\n",
" 'lose',\n",
" 'weight',\n",
" ',',\n",
" 'without',\n",
" 'success',\n",
" '.',\n",
" '[SEP]']"
]
},
"metadata": {},
"execution_count": 112
}
],
"source": [
"inputs = tokenizer(raw_datasets[\"train\"][0][\"tokens\"], is_split_into_words=True)\n",
"inputs.tokens()"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "65b0481e-76fe-4f0a-e7e6-daa6cee662d1",
"id": "Oqi5M1ll_fFb"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -100]\n"
]
}
],
"source": [
"labels = raw_datasets[\"train\"][0][\"ner_tags\"]\n",
"word_ids = inputs.word_ids()\n",
"print(labels)\n",
"print(align_labels_with_tokens(labels, word_ids))"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 81,
"referenced_widgets": [
"49642b493a2d4c3592ed010663ef789c",
"fcc50d75dfb04fccb26c9b93bf8f1efa",
"dc630459e6564d69833d46b63493a160",
"31d0648af26b4fe797f2cb2ff21336a8",
"90e567bcc88445f695a896af6d8da649",
"2e815baaae8940ffb90e2aaf5c6f7e2a",
"32250602bfc140d18859e6b48f9dbfbc",
"a85179eeb7d94bba8c79a10a734d8c6b",
"7346605d4df548afb179d489217990ff",
"6b1a06b7d6ea43da8b9af7f92a882455",
"a1b4b4cc26ff4c50b4ef5c99d8c7cb3e",
"7b3ce57b6b5e4253b8219adc2c6ff47e",
"ee7bc9576ea44b97a6ad9d2ae8adaec7",
"c012f774a21e44188365f5b0646b422e",
"5899086ed64c4d3185374a7f541e22fe",
"3aaf8990b0574a9183b9902eb33670a5",
"cc3d6c6b95ab4c80983b3ce2175acb8a",
"873c876ae8f64b9bba4f25efa3a3859a",
"d727d1396df2443e9e46ab7e0c7d5276",
"cda8e993793c4b949910e309a3f50a03",
"5f38c6b988b44678a7b7f06a99daa983",
"7b53cf7c82a6439eb94d5f0635afe2f3"
]
},
"outputId": "774134e7-c481-40c4-df12-902897c28ad6",
"id": "fVvvny2D_fFb"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
" 0%| | 0/9 [00:00<?, ?ba/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "49642b493a2d4c3592ed010663ef789c"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
" 0%| | 0/3 [00:00<?, ?ba/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "7b3ce57b6b5e4253b8219adc2c6ff47e"
}
},
"metadata": {}
}
],
"source": [
"tokenized_datasets = raw_datasets.map(\n",
" tokenize_and_align_labels,\n",
" batched=True,\n",
" remove_columns=raw_datasets[\"train\"].column_names,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {
"id": "MKc1_p7W_fFb"
},
"outputs": [],
"source": [
"data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e4e50bbf-a7bd-485b-834b-05949304f311",
"id": "-u-nUkGH_fFb"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"tensor([[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, -100, -100, -100, -100, -100, -100, -100, -100, -100,\n",
" -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100],\n",
" [-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, -100]])"
]
},
"metadata": {},
"execution_count": 116
}
],
"source": [
"batch = data_collator([tokenized_datasets[\"train\"][i] for i in range(2)])\n",
"batch[\"labels\"]"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {
"id": "_hW6-lz2_fFb"
},
"outputs": [],
"source": [
"id2label = {str(i): label for i, label in enumerate(label_names)}\n",
"label2id = {v: k for k, v in id2label.items()}"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "4b8291fc-97e1-4d18-9713-7350ec2f075e",
"id": "7zhu1mtB_fFb"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/config.json\n",
"Model config BertConfig {\n",
" \"_name_or_path\": \"pucpr/clinicalnerpt-medical\",\n",
" \"_num_labels\": 3,\n",
" \"architectures\": [\n",
" \"BertForTokenClassification\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"classifier_dropout\": null,\n",
" \"directionality\": \"bidi\",\n",
" \"eos_token_ids\": null,\n",
" \"gradient_checkpointing\": false,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"id2label\": {\n",
" \"0\": \"O\",\n",
" \"1\": \"B-MedicalDevice\",\n",
" \"2\": \"I-MedicalDevice\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"label2id\": {\n",
" \"B-MedicalDevice\": 1,\n",
" \"I-MedicalDevice\": 2,\n",
" \"O\": 0\n",
" },\n",
" \"layer_norm_eps\": 1e-12,\n",
" \"max_position_embeddings\": 512,\n",
" \"model_type\": \"bert\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": null,\n",
" \"pooler_fc_size\": 768,\n",
" \"pooler_num_attention_heads\": 12,\n",
" \"pooler_num_fc_layers\": 3,\n",
" \"pooler_size_per_head\": 128,\n",
" \"pooler_type\": \"first_token_transform\",\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 2,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 119547\n",
"}\n",
"\n",
"loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--pucpr--clinicalnerpt-medical/snapshots/0d889f90b203734b0ba45904781a6779c8eac2b9/pytorch_model.bin\n",
"All model checkpoint weights were used when initializing BertForTokenClassification.\n",
"\n",
"All the weights of BertForTokenClassification were initialized from the model checkpoint at pucpr/clinicalnerpt-medical.\n",
"If your task is similar to the task the model of the checkpoint was trained on, you can already use BertForTokenClassification for predictions without further training.\n"
]
}
],
"source": [
"model = AutoModelForTokenClassification.from_pretrained( \n",
" model_checkpoint\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "7f9279a0-4c0a-41ea-c39c-caf895886d2a",
"id": "-4pqS6QR_fFc"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"PyTorch: setting up devices\n",
"The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
]
}
],
"source": [
"args = TrainingArguments(\n",
" \"NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased\",\n",
" evaluation_strategy = IntervalStrategy.STEPS,\n",
" eval_steps = 50,\n",
" learning_rate=5e-5,\n",
" num_train_epochs=50,\n",
" weight_decay=0.01,\n",
" metric_for_best_model = 'f1',\n",
" load_best_model_at_end=True\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "438ca67b-ebf6-49f3-f3ad-03827a51e196",
"id": "Xrf-cYnW_fFc"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:310: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" FutureWarning,\n",
"***** Running training *****\n",
" Num examples = 8751\n",
" Num Epochs = 50\n",
" Instantaneous batch size per device = 8\n",
" Total train batch size (w. parallel, distributed & accumulation) = 8\n",
" Gradient Accumulation steps = 1\n",
" Total optimization steps = 54700\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='2750' max='54700' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [ 2750/54700 10:53 < 3:26:01, 4.20 it/s, Epoch 2/50]\n",
" </div>\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>Step</th>\n",
" <th>Training Loss</th>\n",
" <th>Validation Loss</th>\n",
" <th>Precision</th>\n",
" <th>Recall</th>\n",
" <th>F1</th>\n",
" <th>Accuracy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>50</td>\n",
" <td>No log</td>\n",
" <td>0.213414</td>\n",
" <td>0.235529</td>\n",
" <td>0.183135</td>\n",
" <td>0.206054</td>\n",
" <td>0.927267</td>\n",
" </tr>\n",
" <tr>\n",
" <td>100</td>\n",
" <td>No log</td>\n",
" <td>0.185129</td>\n",
" <td>0.241594</td>\n",
" <td>0.200724</td>\n",
" <td>0.219271</td>\n",
" <td>0.927296</td>\n",
" </tr>\n",
" <tr>\n",
" <td>150</td>\n",
" <td>No log</td>\n",
" <td>0.175822</td>\n",
" <td>0.340069</td>\n",
" <td>0.304708</td>\n",
" <td>0.321419</td>\n",
" <td>0.933018</td>\n",
" </tr>\n",
" <tr>\n",
" <td>200</td>\n",
" <td>No log</td>\n",
" <td>0.175697</td>\n",
" <td>0.278355</td>\n",
" <td>0.332644</td>\n",
" <td>0.303087</td>\n",
" <td>0.932284</td>\n",
" </tr>\n",
" <tr>\n",
" <td>250</td>\n",
" <td>No log</td>\n",
" <td>0.183824</td>\n",
" <td>0.331652</td>\n",
" <td>0.442318</td>\n",
" <td>0.379073</td>\n",
" <td>0.931647</td>\n",
" </tr>\n",
" <tr>\n",
" <td>300</td>\n",
" <td>No log</td>\n",
" <td>0.197462</td>\n",
" <td>0.431220</td>\n",
" <td>0.228660</td>\n",
" <td>0.298851</td>\n",
" <td>0.932342</td>\n",
" </tr>\n",
" <tr>\n",
" <td>350</td>\n",
" <td>No log</td>\n",
" <td>0.167895</td>\n",
" <td>0.386070</td>\n",
" <td>0.433006</td>\n",
" <td>0.408193</td>\n",
" <td>0.937046</td>\n",
" </tr>\n",
" <tr>\n",
" <td>400</td>\n",
" <td>No log</td>\n",
" <td>0.190235</td>\n",
" <td>0.179594</td>\n",
" <td>0.201242</td>\n",
" <td>0.189802</td>\n",
" <td>0.930726</td>\n",
" </tr>\n",
" <tr>\n",
" <td>450</td>\n",
" <td>No log</td>\n",
" <td>0.182239</td>\n",
" <td>0.341333</td>\n",
" <td>0.463528</td>\n",
" <td>0.393155</td>\n",
" <td>0.936134</td>\n",
" </tr>\n",
" <tr>\n",
" <td>500</td>\n",
" <td>0.197700</td>\n",
" <td>0.161157</td>\n",
" <td>0.377800</td>\n",
" <td>0.383859</td>\n",
" <td>0.380806</td>\n",
" <td>0.938074</td>\n",
" </tr>\n",
" <tr>\n",
" <td>550</td>\n",
" <td>0.197700</td>\n",
" <td>0.158413</td>\n",
" <td>0.422494</td>\n",
" <td>0.375065</td>\n",
" <td>0.397369</td>\n",
" <td>0.940534</td>\n",
" </tr>\n",
" <tr>\n",
" <td>600</td>\n",
" <td>0.197700</td>\n",
" <td>0.155527</td>\n",
" <td>0.395566</td>\n",
" <td>0.350750</td>\n",
" <td>0.371812</td>\n",
" <td>0.939897</td>\n",
" </tr>\n",
" <tr>\n",
" <td>650</td>\n",
" <td>0.197700</td>\n",
" <td>0.159286</td>\n",
" <td>0.323269</td>\n",
" <td>0.321262</td>\n",
" <td>0.322263</td>\n",
" <td>0.938202</td>\n",
" </tr>\n",
" <tr>\n",
" <td>700</td>\n",
" <td>0.197700</td>\n",
" <td>0.171928</td>\n",
" <td>0.381213</td>\n",
" <td>0.503880</td>\n",
" <td>0.434046</td>\n",
" <td>0.933910</td>\n",
" </tr>\n",
" <tr>\n",
" <td>750</td>\n",
" <td>0.197700</td>\n",
" <td>0.147573</td>\n",
" <td>0.323820</td>\n",
" <td>0.305225</td>\n",
" <td>0.314248</td>\n",
" <td>0.942298</td>\n",
" </tr>\n",
" <tr>\n",
" <td>800</td>\n",
" <td>0.197700</td>\n",
" <td>0.148027</td>\n",
" <td>0.425638</td>\n",
" <td>0.439731</td>\n",
" <td>0.432570</td>\n",
" <td>0.943326</td>\n",
" </tr>\n",
" <tr>\n",
" <td>850</td>\n",
" <td>0.197700</td>\n",
" <td>0.147193</td>\n",
" <td>0.408436</td>\n",
" <td>0.395758</td>\n",
" <td>0.401997</td>\n",
" <td>0.943993</td>\n",
" </tr>\n",
" <tr>\n",
" <td>900</td>\n",
" <td>0.197700</td>\n",
" <td>0.164249</td>\n",
" <td>0.404822</td>\n",
" <td>0.495085</td>\n",
" <td>0.445427</td>\n",
" <td>0.943277</td>\n",
" </tr>\n",
" <tr>\n",
" <td>950</td>\n",
" <td>0.197700</td>\n",
" <td>0.155629</td>\n",
" <td>0.408925</td>\n",
" <td>0.289188</td>\n",
" <td>0.338788</td>\n",
" <td>0.940769</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1000</td>\n",
" <td>0.165700</td>\n",
" <td>0.185534</td>\n",
" <td>0.289720</td>\n",
" <td>0.080186</td>\n",
" <td>0.125608</td>\n",
" <td>0.925875</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1050</td>\n",
" <td>0.165700</td>\n",
" <td>0.163427</td>\n",
" <td>0.402687</td>\n",
" <td>0.511640</td>\n",
" <td>0.450672</td>\n",
" <td>0.936379</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1100</td>\n",
" <td>0.165700</td>\n",
" <td>0.148432</td>\n",
" <td>0.446659</td>\n",
" <td>0.428867</td>\n",
" <td>0.437582</td>\n",
" <td>0.944943</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1150</td>\n",
" <td>0.165700</td>\n",
" <td>0.160113</td>\n",
" <td>0.457825</td>\n",
" <td>0.426798</td>\n",
" <td>0.441767</td>\n",
" <td>0.943307</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1200</td>\n",
" <td>0.165700</td>\n",
" <td>0.167280</td>\n",
" <td>0.462996</td>\n",
" <td>0.265391</td>\n",
" <td>0.337389</td>\n",
" <td>0.939074</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1250</td>\n",
" <td>0.165700</td>\n",
" <td>0.147611</td>\n",
" <td>0.443478</td>\n",
" <td>0.448526</td>\n",
" <td>0.445988</td>\n",
" <td>0.946579</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1300</td>\n",
" <td>0.165700</td>\n",
" <td>0.147158</td>\n",
" <td>0.447046</td>\n",
" <td>0.434558</td>\n",
" <td>0.440714</td>\n",
" <td>0.945070</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1350</td>\n",
" <td>0.165700</td>\n",
" <td>0.184782</td>\n",
" <td>0.432701</td>\n",
" <td>0.495603</td>\n",
" <td>0.462021</td>\n",
" <td>0.942092</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1400</td>\n",
" <td>0.165700</td>\n",
" <td>0.164378</td>\n",
" <td>0.409772</td>\n",
" <td>0.381790</td>\n",
" <td>0.395287</td>\n",
" <td>0.944522</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1450</td>\n",
" <td>0.165700</td>\n",
" <td>0.161122</td>\n",
" <td>0.489035</td>\n",
" <td>0.461459</td>\n",
" <td>0.474847</td>\n",
" <td>0.946138</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1500</td>\n",
" <td>0.133400</td>\n",
" <td>0.151911</td>\n",
" <td>0.395260</td>\n",
" <td>0.483187</td>\n",
" <td>0.434823</td>\n",
" <td>0.944914</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1550</td>\n",
" <td>0.133400</td>\n",
" <td>0.160618</td>\n",
" <td>0.470175</td>\n",
" <td>0.485256</td>\n",
" <td>0.477597</td>\n",
" <td>0.945551</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1600</td>\n",
" <td>0.133400</td>\n",
" <td>0.153152</td>\n",
" <td>0.458456</td>\n",
" <td>0.485256</td>\n",
" <td>0.471475</td>\n",
" <td>0.944796</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1650</td>\n",
" <td>0.133400</td>\n",
" <td>0.159076</td>\n",
" <td>0.481323</td>\n",
" <td>0.406622</td>\n",
" <td>0.440830</td>\n",
" <td>0.946981</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1700</td>\n",
" <td>0.133400</td>\n",
" <td>0.147369</td>\n",
" <td>0.451056</td>\n",
" <td>0.486291</td>\n",
" <td>0.468011</td>\n",
" <td>0.948882</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1750</td>\n",
" <td>0.133400</td>\n",
" <td>0.157782</td>\n",
" <td>0.416149</td>\n",
" <td>0.554578</td>\n",
" <td>0.475493</td>\n",
" <td>0.942082</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1800</td>\n",
" <td>0.133400</td>\n",
" <td>0.139047</td>\n",
" <td>0.469262</td>\n",
" <td>0.473875</td>\n",
" <td>0.471557</td>\n",
" <td>0.948186</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1850</td>\n",
" <td>0.133400</td>\n",
" <td>0.169168</td>\n",
" <td>0.467036</td>\n",
" <td>0.392137</td>\n",
" <td>0.426322</td>\n",
" <td>0.947804</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1900</td>\n",
" <td>0.133400</td>\n",
" <td>0.190142</td>\n",
" <td>0.451332</td>\n",
" <td>0.482152</td>\n",
" <td>0.466233</td>\n",
" <td>0.946511</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1950</td>\n",
" <td>0.133400</td>\n",
" <td>0.142872</td>\n",
" <td>0.489979</td>\n",
" <td>0.480600</td>\n",
" <td>0.485244</td>\n",
" <td>0.948431</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2000</td>\n",
" <td>0.127700</td>\n",
" <td>0.141305</td>\n",
" <td>0.490028</td>\n",
" <td>0.444904</td>\n",
" <td>0.466377</td>\n",
" <td>0.948823</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2050</td>\n",
" <td>0.127700</td>\n",
" <td>0.154673</td>\n",
" <td>0.441645</td>\n",
" <td>0.516813</td>\n",
" <td>0.476281</td>\n",
" <td>0.944257</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2100</td>\n",
" <td>0.127700</td>\n",
" <td>0.161482</td>\n",
" <td>0.559639</td>\n",
" <td>0.417486</td>\n",
" <td>0.478222</td>\n",
" <td>0.948402</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2150</td>\n",
" <td>0.127700</td>\n",
" <td>0.155931</td>\n",
" <td>0.482567</td>\n",
" <td>0.494051</td>\n",
" <td>0.488241</td>\n",
" <td>0.946942</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2200</td>\n",
" <td>0.127700</td>\n",
" <td>0.180343</td>\n",
" <td>0.503902</td>\n",
" <td>0.467667</td>\n",
" <td>0.485109</td>\n",
" <td>0.947011</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2250</td>\n",
" <td>0.127700</td>\n",
" <td>0.173788</td>\n",
" <td>0.474372</td>\n",
" <td>0.469219</td>\n",
" <td>0.471782</td>\n",
" <td>0.945678</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2300</td>\n",
" <td>0.127700</td>\n",
" <td>0.172453</td>\n",
" <td>0.415626</td>\n",
" <td>0.533885</td>\n",
" <td>0.467391</td>\n",
" <td>0.942180</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2350</td>\n",
" <td>0.127700</td>\n",
" <td>0.177914</td>\n",
" <td>0.406309</td>\n",
" <td>0.553026</td>\n",
" <td>0.468449</td>\n",
" <td>0.935782</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2400</td>\n",
" <td>0.127700</td>\n",
" <td>0.177702</td>\n",
" <td>0.452438</td>\n",
" <td>0.484739</td>\n",
" <td>0.468032</td>\n",
" <td>0.947794</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2450</td>\n",
" <td>0.127700</td>\n",
" <td>0.149222</td>\n",
" <td>0.504098</td>\n",
" <td>0.445422</td>\n",
" <td>0.472947</td>\n",
" <td>0.947657</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2500</td>\n",
" <td>0.106000</td>\n",
" <td>0.176770</td>\n",
" <td>0.487315</td>\n",
" <td>0.476979</td>\n",
" <td>0.482092</td>\n",
" <td>0.947148</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2550</td>\n",
" <td>0.106000</td>\n",
" <td>0.159687</td>\n",
" <td>0.442256</td>\n",
" <td>0.511123</td>\n",
" <td>0.474202</td>\n",
" <td>0.944669</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2600</td>\n",
" <td>0.106000</td>\n",
" <td>0.185752</td>\n",
" <td>0.529092</td>\n",
" <td>0.503363</td>\n",
" <td>0.515907</td>\n",
" <td>0.949421</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2650</td>\n",
" <td>0.106000</td>\n",
" <td>0.166756</td>\n",
" <td>0.449671</td>\n",
" <td>0.494568</td>\n",
" <td>0.471052</td>\n",
" <td>0.945943</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2700</td>\n",
" <td>0.106000</td>\n",
" <td>0.174073</td>\n",
" <td>0.432521</td>\n",
" <td>0.477496</td>\n",
" <td>0.453897</td>\n",
" <td>0.946609</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2750</td>\n",
" <td>0.106000</td>\n",
" <td>0.186960</td>\n",
" <td>0.526675</td>\n",
" <td>0.439214</td>\n",
" <td>0.478984</td>\n",
" <td>0.947981</td>\n",
" </tr>\n",
" </tbody>\n",
"</table><p>"
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500\n",
"Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/config.json\n",
"Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/pytorch_model.bin\n",
"tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/tokenizer_config.json\n",
"Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-500/special_tokens_map.json\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000\n",
"Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/config.json\n",
"Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/pytorch_model.bin\n",
"tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/tokenizer_config.json\n",
"Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1000/special_tokens_map.json\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500\n",
"Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500/config.json\n",
"Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500/pytorch_model.bin\n",
"tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500/tokenizer_config.json\n",
"Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-1500/special_tokens_map.json\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2000\n",
"Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2000/config.json\n",
"Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2000/pytorch_model.bin\n",
"tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2000/tokenizer_config.json\n",
"Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2000/special_tokens_map.json\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"Saving model checkpoint to NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2500\n",
"Configuration saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2500/config.json\n",
"Model weights saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2500/pytorch_model.bin\n",
"tokenizer config file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2500/tokenizer_config.json\n",
"Special tokens file saved in NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2500/special_tokens_map.json\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 2917\n",
" Batch size = 8\n",
"\n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"Loading best model from NLP-CIC-WFU_DisTEMIST_fine_tuned_bert-base-multilingual-cased/checkpoint-2500 (score: 0.48209150326797384).\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"TrainOutput(global_step=2750, training_loss=0.14159858218106355, metrics={'train_runtime': 653.9792, 'train_samples_per_second': 669.058, 'train_steps_per_second': 83.642, 'total_flos': 861581789561556.0, 'train_loss': 0.14159858218106355, 'epoch': 2.51})"
]
},
"metadata": {},
"execution_count": 120
}
],
"source": [
"trainer = Trainer(\n",
" model=model,\n",
" args=args,\n",
" train_dataset=tokenized_datasets[\"train\"],\n",
" eval_dataset=tokenized_datasets[\"validation\"],\n",
" data_collator=data_collator,\n",
" compute_metrics=compute_metrics,\n",
" tokenizer=tokenizer,\n",
" callbacks = [EarlyStoppingCallback(early_stopping_patience=3)]\n",
")\n",
"trainer.train()"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "977619c0-4fc0-423f-f47a-b308c9fb1ecd",
"id": "RdLDXL1K_fFc"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"Saving model checkpoint to model/multilingual-BERT-sentence\n",
"Configuration saved in model/multilingual-BERT-sentence/config.json\n",
"Model weights saved in model/multilingual-BERT-sentence/pytorch_model.bin\n",
"tokenizer config file saved in model/multilingual-BERT-sentence/tokenizer_config.json\n",
"Special tokens file saved in model/multilingual-BERT-sentence/special_tokens_map.json\n"
]
}
],
"source": [
"trainer.save_model('model/multilingual-BERT-sentence')"
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "kFOvQa4MCQdW"
},
"execution_count": 166,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.2"
},
"colab": {
"provenance": [],
"collapsed_sections": [],
"machine_shape": "hm"
},
"accelerator": "GPU",
"gpuClass": "premium",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"b3fe12ca95e84b198d16bdb4d20f9ad9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_1b7f8f1786394c01bad4a8589ad16513",
"IPY_MODEL_70e437b3ba294189b4799c6607532ebd",
"IPY_MODEL_0fb47d91dbf9497cac1ffc1c5dfd4519"
],
"layout": "IPY_MODEL_9cfec0f21c0a459f9f5888c389a6a479"
}
},
"1b7f8f1786394c01bad4a8589ad16513": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_ef66098fb5f748eabe11abc3fe4ad54d",
"placeholder": "",
"style": "IPY_MODEL_563d8b35192240be960bc08909984119",
"value": "Downloading builder script: "
}
},
"70e437b3ba294189b4799c6607532ebd": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b94385d1423e47f5a9e2351bf873c3e0",
"max": 2472,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_e1b6e7774bc94a87ad23fb53d6c9b985",
"value": 2472
}
},
"0fb47d91dbf9497cac1ffc1c5dfd4519": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_a0b523772cf04a85b0ac000cc9a83c67",
"placeholder": "",
"style": "IPY_MODEL_71a3f1b2112344ea81721e59cce14cec",
"value": " 6.33k/? [00:00<00:00, 198kB/s]"
}
},
"9cfec0f21c0a459f9f5888c389a6a479": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"ef66098fb5f748eabe11abc3fe4ad54d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"563d8b35192240be960bc08909984119": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"b94385d1423e47f5a9e2351bf873c3e0": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e1b6e7774bc94a87ad23fb53d6c9b985": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"a0b523772cf04a85b0ac000cc9a83c67": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"71a3f1b2112344ea81721e59cce14cec": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"a275c54cbefb4438a3015080e8b57999": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_a4696c167a3247bd8fd0727e0556463a",
"IPY_MODEL_afa79c37c031491da9e229c637d80cc4",
"IPY_MODEL_5ddf799b2fd94edc9949d36450a2d5e9"
],
"layout": "IPY_MODEL_592af11564074af19e40bce6680ed7f1"
}
},
"a4696c167a3247bd8fd0727e0556463a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_125b61b8e80d4192a6f19d43ba4797dc",
"placeholder": "",
"style": "IPY_MODEL_d9ff16ada2d94eb7a1adc70e5265ff2d",
"value": "Downloading: 100%"
}
},
"afa79c37c031491da9e229c637d80cc4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_f9bd10de9e2845f08100a29293b92d1c",
"max": 373,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_c28349dbeccc4124b583e0eeec004e6b",
"value": 373
}
},
"5ddf799b2fd94edc9949d36450a2d5e9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_ff2cf349b07442bd9812dd8c7e82e59b",
"placeholder": "",
"style": "IPY_MODEL_414d466fed0b42378d8b38f10c720eba",
"value": " 373/373 [00:00<00:00, 10.0kB/s]"
}
},
"592af11564074af19e40bce6680ed7f1": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"125b61b8e80d4192a6f19d43ba4797dc": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d9ff16ada2d94eb7a1adc70e5265ff2d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"f9bd10de9e2845f08100a29293b92d1c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c28349dbeccc4124b583e0eeec004e6b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"ff2cf349b07442bd9812dd8c7e82e59b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"414d466fed0b42378d8b38f10c720eba": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"e432f1e3e5c54358a321a21e9c7aad1f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_cf931d70dc1a4d2ba5f10dba7bf90ece",
"IPY_MODEL_58f1edc459ef4f5bab25544474897db3",
"IPY_MODEL_8894005504364c36964d283cf58bb223"
],
"layout": "IPY_MODEL_78db41a453ce4ff4884960c615147331"
}
},
"cf931d70dc1a4d2ba5f10dba7bf90ece": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_fe8d877f0fc1417baad9838094045475",
"placeholder": "",
"style": "IPY_MODEL_74bfdb85ed55436f8c12bf9b25375533",
"value": "Downloading: 100%"
}
},
"58f1edc459ef4f5bab25544474897db3": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_e813e2a1cb7248b7a8c404d55e4fb248",
"max": 231508,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_cb3c438fb3a6412d80b5ba673a6455cb",
"value": 231508
}
},
"8894005504364c36964d283cf58bb223": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_bb189f5bc189462cad4824a1c30335c0",
"placeholder": "",
"style": "IPY_MODEL_e98b7218049f4310951a1608c52c14e0",
"value": " 232k/232k [00:00<00:00, 2.99MB/s]"
}
},
"78db41a453ce4ff4884960c615147331": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"fe8d877f0fc1417baad9838094045475": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"74bfdb85ed55436f8c12bf9b25375533": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"e813e2a1cb7248b7a8c404d55e4fb248": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"cb3c438fb3a6412d80b5ba673a6455cb": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"bb189f5bc189462cad4824a1c30335c0": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e98b7218049f4310951a1608c52c14e0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"3dd1f27ff0d24a1294534ff7e69a7abb": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_4128d82e19f14e9d9be5416ebc974d0d",
"IPY_MODEL_6676a80dc293456ea7aed4ce3e281d83",
"IPY_MODEL_55a7a4c336884f26a53292d559a06ff8"
],
"layout": "IPY_MODEL_d27469698b1e4ad1ae74ced6f7c3942d"
}
},
"4128d82e19f14e9d9be5416ebc974d0d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_d201490a05c049d38b087008aac0a400",
"placeholder": "",
"style": "IPY_MODEL_703d715a4ef64c4e93cc6496f5340451",
"value": "Downloading: 100%"
}
},
"6676a80dc293456ea7aed4ce3e281d83": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_41d861058e3e458e949f1f3d92623217",
"max": 711494,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_f4d9343bd31d47b1b3dcf0494825be2d",
"value": 711494
}
},
"55a7a4c336884f26a53292d559a06ff8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b75e2a47db2b47dd8740f77b337c308f",
"placeholder": "",
"style": "IPY_MODEL_d8849516ccb44011a7f9e7e745b30c60",
"value": " 711k/711k [00:00<00:00, 6.20MB/s]"
}
},
"d27469698b1e4ad1ae74ced6f7c3942d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d201490a05c049d38b087008aac0a400": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"703d715a4ef64c4e93cc6496f5340451": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"41d861058e3e458e949f1f3d92623217": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f4d9343bd31d47b1b3dcf0494825be2d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"b75e2a47db2b47dd8740f77b337c308f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d8849516ccb44011a7f9e7e745b30c60": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"ae5928c8da4243fba06ae9bf5086ba31": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_e613455bcbb24e36a31666acd83d7b24",
"IPY_MODEL_bac45a33f9b444a1985ef56a9be85c52",
"IPY_MODEL_91ac0673e600400f904b1b10deb86cee"
],
"layout": "IPY_MODEL_289f23dd30814993afde0f5e987fdd9e"
}
},
"e613455bcbb24e36a31666acd83d7b24": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_8e72912c0e434060ac30517a98d07a9e",
"placeholder": "",
"style": "IPY_MODEL_ede252ab2cee4ffbbc2f5519373d1e97",
"value": "Downloading: 100%"
}
},
"bac45a33f9b444a1985ef56a9be85c52": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_f2da21cc1007475ca0233a9e5d146d65",
"max": 125,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_92fd7c43f87142d1bbd05f89ba3bfe39",
"value": 125
}
},
"91ac0673e600400f904b1b10deb86cee": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b58c9bfa30b3421496adb52e082cb50a",
"placeholder": "",
"style": "IPY_MODEL_6cbfa925d26e47139365d10b9b28d96a",
"value": " 125/125 [00:00<00:00, 5.13kB/s]"
}
},
"289f23dd30814993afde0f5e987fdd9e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"8e72912c0e434060ac30517a98d07a9e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"ede252ab2cee4ffbbc2f5519373d1e97": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"f2da21cc1007475ca0233a9e5d146d65": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"92fd7c43f87142d1bbd05f89ba3bfe39": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"b58c9bfa30b3421496adb52e082cb50a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6cbfa925d26e47139365d10b9b28d96a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"d161016f9fea41e6b27eb537c12d0703": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_f315bcfdc76848cb8e851a2698e0248b",
"IPY_MODEL_ef152955607540f2a7d38bf9e2207eec",
"IPY_MODEL_456c36425ac94dc294f8402c07668a51"
],
"layout": "IPY_MODEL_4d305d32efdf4b639e65e816a7132597"
}
},
"f315bcfdc76848cb8e851a2698e0248b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_5c3a10b039c344509be9867ca40a8472",
"placeholder": "",
"style": "IPY_MODEL_6663eac35b7a4043b97edb90a555e3d9",
"value": " 0%"
}
},
"ef152955607540f2a7d38bf9e2207eec": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "danger",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_30bce58edba74043abc1a2625c492d4a",
"max": 1,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_d4b6dbbad9c946ed99b6c6e587bfb6da",
"value": 0
}
},
"456c36425ac94dc294f8402c07668a51": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_8976a59e4ea049088f92a37f7547e16e",
"placeholder": "",
"style": "IPY_MODEL_6a689955d9b3463abaaaa03b62d3cf69",
"value": " 0/1 [00:00<?, ?ba/s]"
}
},
"4d305d32efdf4b639e65e816a7132597": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"5c3a10b039c344509be9867ca40a8472": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6663eac35b7a4043b97edb90a555e3d9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"30bce58edba74043abc1a2625c492d4a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d4b6dbbad9c946ed99b6c6e587bfb6da": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"8976a59e4ea049088f92a37f7547e16e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6a689955d9b3463abaaaa03b62d3cf69": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"9d65a59161cd401aad05f4a52d51c724": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_6f244b91a3884eb5b0fbd577ed5d1710",
"IPY_MODEL_2b6ad660dd1f4c78855433118b9fb61e",
"IPY_MODEL_101fa9a9581a46d8b1e0951f03796740"
],
"layout": "IPY_MODEL_7ffe4378bc7b410780780dd51d0705ea"
}
},
"6f244b91a3884eb5b0fbd577ed5d1710": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_77c422e831944566a6529da37645ef6d",
"placeholder": "",
"style": "IPY_MODEL_9a88121d0138438980f1c7e4341f480a",
"value": " 0%"
}
},
"2b6ad660dd1f4c78855433118b9fb61e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "danger",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_52600cdbf4804b148e02724ae4902de5",
"max": 1,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_11f16a0c34e64d6494ac1d2550d18f8f",
"value": 0
}
},
"101fa9a9581a46d8b1e0951f03796740": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_645616ac236e479c8303a56100d26d51",
"placeholder": "",
"style": "IPY_MODEL_859e35e323f0407fbdea9eb7ae953742",
"value": " 0/1 [00:00<?, ?ba/s]"
}
},
"7ffe4378bc7b410780780dd51d0705ea": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"77c422e831944566a6529da37645ef6d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9a88121d0138438980f1c7e4341f480a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"52600cdbf4804b148e02724ae4902de5": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"11f16a0c34e64d6494ac1d2550d18f8f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"645616ac236e479c8303a56100d26d51": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"859e35e323f0407fbdea9eb7ae953742": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"40c2b37fa07f44648cecc9b7e406e7e2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_69ed5fe8ed6046acb4202689c065f858",
"IPY_MODEL_bbd9cf7a77aa48fda3a648583ed02b08",
"IPY_MODEL_ebe8b2b35e884fd28bb42eacf01ff07c"
],
"layout": "IPY_MODEL_ddb14bc1d5d4437a9ee4a895846e7d29"
}
},
"69ed5fe8ed6046acb4202689c065f858": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_669c24c6309f46cbbdcd0c764143e74f",
"placeholder": "",
"style": "IPY_MODEL_d1e4665beafa4bbeb25d0e9e8447a5a9",
"value": " 89%"
}
},
"bbd9cf7a77aa48fda3a648583ed02b08": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "danger",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_09217bdc1e2145eb84cc97207595e6f0",
"max": 9,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_b294af01ac5f483dacbe2e1c40fdf223",
"value": 8
}
},
"ebe8b2b35e884fd28bb42eacf01ff07c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_c1e27e9184204d618ce59b97f7302335",
"placeholder": "",
"style": "IPY_MODEL_2e63f2af443d448aaaddf81127def048",
"value": " 8/9 [00:00<00:00, 9.96ba/s]"
}
},
"ddb14bc1d5d4437a9ee4a895846e7d29": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"669c24c6309f46cbbdcd0c764143e74f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d1e4665beafa4bbeb25d0e9e8447a5a9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"09217bdc1e2145eb84cc97207595e6f0": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b294af01ac5f483dacbe2e1c40fdf223": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"c1e27e9184204d618ce59b97f7302335": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"2e63f2af443d448aaaddf81127def048": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"9239cc2fd1d94d86986b7f395de70fca": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_f1e8d31b67db4089ab1b036bda341617",
"IPY_MODEL_33be40ebcab54ff68855f1145cf5e1d6",
"IPY_MODEL_d96c111f09d74a0c9816328f88d9e45b"
],
"layout": "IPY_MODEL_3907dc2aaa484877aee9beab8a6888d4"
}
},
"f1e8d31b67db4089ab1b036bda341617": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_aaacfb0f3bd1427ea44ec84c28a2aaf7",
"placeholder": "",
"style": "IPY_MODEL_cb92e843491142e8a2a4008223a90d02",
"value": " 67%"
}
},
"33be40ebcab54ff68855f1145cf5e1d6": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "danger",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_52169f264141463e94a7761a4ffb3f7a",
"max": 3,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_e509b790873740b59aa2f52875ca2038",
"value": 2
}
},
"d96c111f09d74a0c9816328f88d9e45b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_a395318bce7348d78ca83a308552f042",
"placeholder": "",
"style": "IPY_MODEL_a2bb171f700743559e1d2c472c8289ef",
"value": " 2/3 [00:00<00:00, 8.13ba/s]"
}
},
"3907dc2aaa484877aee9beab8a6888d4": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"aaacfb0f3bd1427ea44ec84c28a2aaf7": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"cb92e843491142e8a2a4008223a90d02": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"52169f264141463e94a7761a4ffb3f7a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e509b790873740b59aa2f52875ca2038": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"a395318bce7348d78ca83a308552f042": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a2bb171f700743559e1d2c472c8289ef": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"f0fc94c6df4c432f9e1edcfceaf44edd": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_03f4216f904d4bf6a8e461c50f40378f",
"IPY_MODEL_76fecb01ad6441a08ad0ddb989a8ab80",
"IPY_MODEL_783edab3150d40a3bf99ed910cdbaf88"
],
"layout": "IPY_MODEL_e33897e8fa4841308c55b19352876ab1"
}
},
"03f4216f904d4bf6a8e461c50f40378f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_909d7d349769402a8c20b02b35eafbd9",
"placeholder": "",
"style": "IPY_MODEL_6139e3d551044671a79e15e8adf351ab",
"value": "Downloading: 100%"
}
},
"76fecb01ad6441a08ad0ddb989a8ab80": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_1c19ec799ccb4e788f34ed8ac37d495f",
"max": 151,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_7a49ac5f18f64b41825378184f8c32ec",
"value": 151
}
},
"783edab3150d40a3bf99ed910cdbaf88": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_76ad2415389f41deb4f2810bca56b753",
"placeholder": "",
"style": "IPY_MODEL_8c4bb02e55fa48429c8f86dac5cb369e",
"value": " 151/151 [00:00<00:00, 5.83kB/s]"
}
},
"e33897e8fa4841308c55b19352876ab1": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"909d7d349769402a8c20b02b35eafbd9": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6139e3d551044671a79e15e8adf351ab": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"1c19ec799ccb4e788f34ed8ac37d495f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"7a49ac5f18f64b41825378184f8c32ec": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"76ad2415389f41deb4f2810bca56b753": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"8c4bb02e55fa48429c8f86dac5cb369e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"980374f604ec4970b0afa70d108c864b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_baa9e8a9169a45efb8c117fdf4ea45b7",
"IPY_MODEL_aa248b7d4eae4e5d965a7d04144adacc",
"IPY_MODEL_311d8199627e4c4f83d02c167b5755f3"
],
"layout": "IPY_MODEL_a299d926edbb4c51906b1be8f694d074"
}
},
"baa9e8a9169a45efb8c117fdf4ea45b7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_c76ebf5d6c6c46cd9d14591a47725ae1",
"placeholder": "",
"style": "IPY_MODEL_178555439d854d30a01cac053adf9079",
"value": "Downloading: 100%"
}
},
"aa248b7d4eae4e5d965a7d04144adacc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_00c8d81110fe4f4bbcf77be4d20581c3",
"max": 1055,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_43c18ef010014cb395f045dd26497fba",
"value": 1055
}
},
"311d8199627e4c4f83d02c167b5755f3": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_3b83d1f429d34e8e8de6ddff555df02d",
"placeholder": "",
"style": "IPY_MODEL_91a8348ac2194686a9ef075f7d49687d",
"value": " 1.05k/1.05k [00:00<00:00, 26.5kB/s]"
}
},
"a299d926edbb4c51906b1be8f694d074": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c76ebf5d6c6c46cd9d14591a47725ae1": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"178555439d854d30a01cac053adf9079": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"00c8d81110fe4f4bbcf77be4d20581c3": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"43c18ef010014cb395f045dd26497fba": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"3b83d1f429d34e8e8de6ddff555df02d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"91a8348ac2194686a9ef075f7d49687d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"42dcc74bff5440608a3e9f2fa580cd3c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_616c54d0cd534047b93b215e7baf2ba3",
"IPY_MODEL_846ce38d6fb84279a7419091f2d269b0",
"IPY_MODEL_1e15050772b54e34b014a98b9710c783"
],
"layout": "IPY_MODEL_951f476862ea49619100e202a6e742f6"
}
},
"616c54d0cd534047b93b215e7baf2ba3": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_f6e25547ba664cd59128536944a926fd",
"placeholder": "",
"style": "IPY_MODEL_53185a2afedc41e0a680d5007656b90b",
"value": "Downloading: 100%"
}
},
"846ce38d6fb84279a7419091f2d269b0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_860f8204efae42d6ad2fa4eb9e661810",
"max": 995526,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_e61ec283a9c04cc696d17bbe24ccf460",
"value": 995526
}
},
"1e15050772b54e34b014a98b9710c783": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_15abc268777a4e3cb5c3c7f430745c1d",
"placeholder": "",
"style": "IPY_MODEL_e6c1266e8b074bdfafa0db6208743a07",
"value": " 996k/996k [00:00<00:00, 4.17MB/s]"
}
},
"951f476862ea49619100e202a6e742f6": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f6e25547ba664cd59128536944a926fd": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"53185a2afedc41e0a680d5007656b90b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"860f8204efae42d6ad2fa4eb9e661810": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e61ec283a9c04cc696d17bbe24ccf460": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"15abc268777a4e3cb5c3c7f430745c1d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e6c1266e8b074bdfafa0db6208743a07": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"01a12f499b7942cc90f2032a8f3284e9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_804bf9b8a2154399a05dd0860f4dfd89",
"IPY_MODEL_66fca4d7946240c3b08ba51fac82f2ae",
"IPY_MODEL_6cefb3be5be9488ca033ed9908c6a8f5"
],
"layout": "IPY_MODEL_2d0f2804db004da0914e4733ce96b749"
}
},
"804bf9b8a2154399a05dd0860f4dfd89": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_3d6b93cde5254ce99f19802b7c1146f4",
"placeholder": "",
"style": "IPY_MODEL_838416bfbee0400299abff324c4825bc",
"value": "Downloading: 100%"
}
},
"66fca4d7946240c3b08ba51fac82f2ae": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_f7584b77d97f4d48aa5b50bae2df49f8",
"max": 112,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_0a5cd97f8a914ff89fd27aed7b38164d",
"value": 112
}
},
"6cefb3be5be9488ca033ed9908c6a8f5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_ad4c308ebd574e909d31b161580b9064",
"placeholder": "",
"style": "IPY_MODEL_74030c98fcb942ed9d9ffc43799113f0",
"value": " 112/112 [00:00<00:00, 4.51kB/s]"
}
},
"2d0f2804db004da0914e4733ce96b749": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"3d6b93cde5254ce99f19802b7c1146f4": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"838416bfbee0400299abff324c4825bc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"f7584b77d97f4d48aa5b50bae2df49f8": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"0a5cd97f8a914ff89fd27aed7b38164d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"ad4c308ebd574e909d31b161580b9064": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"74030c98fcb942ed9d9ffc43799113f0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"84631a71d2ca4ae8a781019ea3ce6da9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_9666c2ff4f32449ea3cbef076a166836",
"IPY_MODEL_efc4af4547804d9daae235691942e73a",
"IPY_MODEL_c8194cd34f554a789359eab6e7596291"
],
"layout": "IPY_MODEL_54f649d297ec4456be5b5df14497fb93"
}
},
"9666c2ff4f32449ea3cbef076a166836": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_e35bc6f834c4490e85ac2ae25d9e922f",
"placeholder": "",
"style": "IPY_MODEL_b166c61328bb49ea803f0d3a7d515d81",
"value": " 0%"
}
},
"efc4af4547804d9daae235691942e73a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "danger",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_a2aac740ef3b4f3c913b71c82b408c2c",
"max": 1,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_d77e188e43dc4e01b82054f2a6a8e832",
"value": 0
}
},
"c8194cd34f554a789359eab6e7596291": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_266b2c90bb4d41198784f016e996066a",
"placeholder": "",
"style": "IPY_MODEL_cc62b20cee8c4a4b8e24576d1c854fbf",
"value": " 0/1 [00:00<?, ?ba/s]"
}
},
"54f649d297ec4456be5b5df14497fb93": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e35bc6f834c4490e85ac2ae25d9e922f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b166c61328bb49ea803f0d3a7d515d81": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"a2aac740ef3b4f3c913b71c82b408c2c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d77e188e43dc4e01b82054f2a6a8e832": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"266b2c90bb4d41198784f016e996066a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"cc62b20cee8c4a4b8e24576d1c854fbf": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"98bebe04cb254369bb3b6b991d4b2648": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_40080df663cc43749963657150cf632d",
"IPY_MODEL_a04f157a98db4d47b75094b6ef1b0990",
"IPY_MODEL_ba2967950f4c483ea399827046f52963"
],
"layout": "IPY_MODEL_a9e0ad6a141a462fb9bea1c18d447332"
}
},
"40080df663cc43749963657150cf632d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_31fbaf0ffb0845f5800e6fca0353b929",
"placeholder": "",
"style": "IPY_MODEL_4a2c17e757d34547a4a68718ef064073",
"value": " 0%"
}
},
"a04f157a98db4d47b75094b6ef1b0990": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "danger",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_f74d219071ab49479194f1061bf343be",
"max": 1,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_a92097360dba4d5c848b48e345b0028e",
"value": 0
}
},
"ba2967950f4c483ea399827046f52963": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_24a164b22a8f4e00944ef05bcec5d032",
"placeholder": "",
"style": "IPY_MODEL_976ade0b37cd43e2aa5aa272dac2445b",
"value": " 0/1 [00:00<?, ?ba/s]"
}
},
"a9e0ad6a141a462fb9bea1c18d447332": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"31fbaf0ffb0845f5800e6fca0353b929": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"4a2c17e757d34547a4a68718ef064073": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"f74d219071ab49479194f1061bf343be": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a92097360dba4d5c848b48e345b0028e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"24a164b22a8f4e00944ef05bcec5d032": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"976ade0b37cd43e2aa5aa272dac2445b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"21b02caa5dc146b8ac2bd1a282381c7f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_0c01b39e34744f74ae47d0d4e70638ce",
"IPY_MODEL_82e7357418144359abba3548449c0c08",
"IPY_MODEL_2ff5e18b6d684b99a82676dbf3db6d32"
],
"layout": "IPY_MODEL_cd75e771337843d9b55838502bed9a1b"
}
},
"0c01b39e34744f74ae47d0d4e70638ce": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b6a4250c705f4dd8b9f52731cce2a23d",
"placeholder": "",
"style": "IPY_MODEL_4549eb0838864025ac6a0f3da9192818",
"value": "Downloading: 100%"
}
},
"82e7357418144359abba3548449c0c08": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b2d377844c1a4bc09433a94088f5213e",
"max": 709144049,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_fe7e058b9a6944969d83f7e72b398bb1",
"value": 709144049
}
},
"2ff5e18b6d684b99a82676dbf3db6d32": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_47d5ccd1eafe4ea1a3476e06d998bd74",
"placeholder": "",
"style": "IPY_MODEL_5a17027205cb4c2bbe140e1e96e4b495",
"value": " 709M/709M [00:11<00:00, 62.7MB/s]"
}
},
"cd75e771337843d9b55838502bed9a1b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b6a4250c705f4dd8b9f52731cce2a23d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"4549eb0838864025ac6a0f3da9192818": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"b2d377844c1a4bc09433a94088f5213e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"fe7e058b9a6944969d83f7e72b398bb1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"47d5ccd1eafe4ea1a3476e06d998bd74": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"5a17027205cb4c2bbe140e1e96e4b495": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"49642b493a2d4c3592ed010663ef789c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_fcc50d75dfb04fccb26c9b93bf8f1efa",
"IPY_MODEL_dc630459e6564d69833d46b63493a160",
"IPY_MODEL_31d0648af26b4fe797f2cb2ff21336a8"
],
"layout": "IPY_MODEL_90e567bcc88445f695a896af6d8da649"
}
},
"fcc50d75dfb04fccb26c9b93bf8f1efa": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_2e815baaae8940ffb90e2aaf5c6f7e2a",
"placeholder": "",
"style": "IPY_MODEL_32250602bfc140d18859e6b48f9dbfbc",
"value": " 89%"
}
},
"dc630459e6564d69833d46b63493a160": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "danger",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_a85179eeb7d94bba8c79a10a734d8c6b",
"max": 9,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_7346605d4df548afb179d489217990ff",
"value": 8
}
},
"31d0648af26b4fe797f2cb2ff21336a8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_6b1a06b7d6ea43da8b9af7f92a882455",
"placeholder": "",
"style": "IPY_MODEL_a1b4b4cc26ff4c50b4ef5c99d8c7cb3e",
"value": " 8/9 [00:00<00:00, 9.55ba/s]"
}
},
"90e567bcc88445f695a896af6d8da649": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"2e815baaae8940ffb90e2aaf5c6f7e2a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"32250602bfc140d18859e6b48f9dbfbc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"a85179eeb7d94bba8c79a10a734d8c6b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"7346605d4df548afb179d489217990ff": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"6b1a06b7d6ea43da8b9af7f92a882455": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a1b4b4cc26ff4c50b4ef5c99d8c7cb3e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"7b3ce57b6b5e4253b8219adc2c6ff47e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_ee7bc9576ea44b97a6ad9d2ae8adaec7",
"IPY_MODEL_c012f774a21e44188365f5b0646b422e",
"IPY_MODEL_5899086ed64c4d3185374a7f541e22fe"
],
"layout": "IPY_MODEL_3aaf8990b0574a9183b9902eb33670a5"
}
},
"ee7bc9576ea44b97a6ad9d2ae8adaec7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_cc3d6c6b95ab4c80983b3ce2175acb8a",
"placeholder": "",
"style": "IPY_MODEL_873c876ae8f64b9bba4f25efa3a3859a",
"value": " 67%"
}
},
"c012f774a21e44188365f5b0646b422e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "danger",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_d727d1396df2443e9e46ab7e0c7d5276",
"max": 3,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_cda8e993793c4b949910e309a3f50a03",
"value": 2
}
},
"5899086ed64c4d3185374a7f541e22fe": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_5f38c6b988b44678a7b7f06a99daa983",
"placeholder": "",
"style": "IPY_MODEL_7b53cf7c82a6439eb94d5f0635afe2f3",
"value": " 2/3 [00:00<00:00, 7.76ba/s]"
}
},
"3aaf8990b0574a9183b9902eb33670a5": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"cc3d6c6b95ab4c80983b3ce2175acb8a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"873c876ae8f64b9bba4f25efa3a3859a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"d727d1396df2443e9e46ab7e0c7d5276": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"cda8e993793c4b949910e309a3f50a03": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"5f38c6b988b44678a7b7f06a99daa983": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"7b53cf7c82a6439eb94d5f0635afe2f3": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}