Disease-NER / Git / [ce2cbf] /EL

Models:
philipB/
Disease-NER
Downloads: 1
[ce2cbf]: / EL_pubmedbert.ipynb
History
Download this file
1329 lines (1328 with data), 44.6 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "138778c4",
   "metadata": {
    "id": "138778c4"
   },
   "outputs": [],
   "source": [
    "from tqdm import tqdm\n",
    "import pandas as pd\n",
    "from sklearn import metrics\n",
    "from scipy.spatial.distance import cdist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "dTSILRD7hIHG",
   "metadata": {
    "id": "dTSILRD7hIHG"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: transformers in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (4.24.0)\n",
      "Requirement already satisfied: filelock in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers) (3.6.0)\n",
      "Requirement already satisfied: packaging>=20.0 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers) (21.3)\n",
      "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers) (0.13.1)\n",
      "Requirement already satisfied: huggingface-hub<1.0,>=0.10.0 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers) (0.10.1)\n",
      "Requirement already satisfied: regex!=2019.12.17 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers) (2022.7.25)\n",
      "Requirement already satisfied: numpy>=1.17 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers) (1.21.5)\n",
      "Requirement already satisfied: pyyaml>=5.1 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers) (6.0)\n",
      "Requirement already satisfied: tqdm>=4.27 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers) (4.64.1)\n",
      "Requirement already satisfied: requests in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers) (2.27.1)\n",
      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from huggingface-hub<1.0,>=0.10.0->transformers) (4.1.1)\n",
      "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from packaging>=20.0->transformers) (2.4.7)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from requests->transformers) (3.3)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from requests->transformers) (2021.10.8)\n",
      "Requirement already satisfied: charset-normalizer~=2.0.0 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from requests->transformers) (2.0.4)\n",
      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from requests->transformers) (1.26.9)\n"
     ]
    }
   ],
   "source": [
    "!pip install transformers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "70512d4e",
   "metadata": {
    "id": "70512d4e"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: parallelformers in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (1.2.7)\n",
      "Requirement already satisfied: dacite in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from parallelformers) (1.6.0)\n",
      "Requirement already satisfied: torch in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from parallelformers) (1.11.0)\n",
      "Requirement already satisfied: transformers>=4.2 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from parallelformers) (4.24.0)\n",
      "Requirement already satisfied: requests in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers>=4.2->parallelformers) (2.27.1)\n",
      "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers>=4.2->parallelformers) (0.13.1)\n",
      "Requirement already satisfied: regex!=2019.12.17 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers>=4.2->parallelformers) (2022.7.25)\n",
      "Requirement already satisfied: huggingface-hub<1.0,>=0.10.0 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers>=4.2->parallelformers) (0.10.1)\n",
      "Requirement already satisfied: tqdm>=4.27 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers>=4.2->parallelformers) (4.64.1)\n",
      "Requirement already satisfied: pyyaml>=5.1 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers>=4.2->parallelformers) (6.0)\n",
      "Requirement already satisfied: packaging>=20.0 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers>=4.2->parallelformers) (21.3)\n",
      "Requirement already satisfied: filelock in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers>=4.2->parallelformers) (3.6.0)\n",
      "Requirement already satisfied: numpy>=1.17 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from transformers>=4.2->parallelformers) (1.21.5)\n",
      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from huggingface-hub<1.0,>=0.10.0->transformers>=4.2->parallelformers) (4.1.1)\n",
      "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from packaging>=20.0->transformers>=4.2->parallelformers) (2.4.7)\n",
      "Requirement already satisfied: charset-normalizer~=2.0.0 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from requests->transformers>=4.2->parallelformers) (2.0.4)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from requests->transformers>=4.2->parallelformers) (3.3)\n",
      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from requests->transformers>=4.2->parallelformers) (1.26.9)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (from requests->transformers>=4.2->parallelformers) (2021.10.8)\n"
     ]
    }
   ],
   "source": [
    "!pip install parallelformers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "6b2ace9d",
   "metadata": {
    "id": "6b2ace9d"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: faiss-gpu in /home2/sashank.sridhar/miniconda3/envs/TripletLoss/lib/python3.9/site-packages (1.7.2)\r\n"
     ]
    }
   ],
   "source": [
    "!pip install faiss-gpu"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5a0d2481",
   "metadata": {
    "id": "5a0d2481"
   },
   "source": [
    "Download snomed term-concept file from UMLS website"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "ea498c9d",
   "metadata": {
    "id": "ea498c9d"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1569232\n"
     ]
    }
   ],
   "source": [
    "snomed_csv = pd.read_csv(\"sct2_Description_Snapshot-en_INT_20220831.txt\", delimiter=\"\\t\")\n",
    "print(len(snomed_csv))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "DkqmgM5uxfob",
   "metadata": {
    "id": "DkqmgM5uxfob"
   },
   "outputs": [],
   "source": [
    "# from google.colab import drive\n",
    "# drive.mount('/content/drive')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "d811cd3c",
   "metadata": {
    "id": "d811cd3c"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['id', 'effectiveTime', 'active', 'moduleId', 'conceptId',\n",
       "       'languageCode', 'typeId', 'term', 'caseSignificanceId'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "snomed_csv.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "3327f0d9",
   "metadata": {
    "id": "3327f0d9"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>effectiveTime</th>\n",
       "      <th>active</th>\n",
       "      <th>moduleId</th>\n",
       "      <th>conceptId</th>\n",
       "      <th>languageCode</th>\n",
       "      <th>typeId</th>\n",
       "      <th>term</th>\n",
       "      <th>caseSignificanceId</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>101013</td>\n",
       "      <td>20170731</td>\n",
       "      <td>1</td>\n",
       "      <td>900000000000207008</td>\n",
       "      <td>126813005</td>\n",
       "      <td>en</td>\n",
       "      <td>900000000000013009</td>\n",
       "      <td>Neoplasm of anterior aspect of epiglottis</td>\n",
       "      <td>900000000000448009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>102018</td>\n",
       "      <td>20170731</td>\n",
       "      <td>1</td>\n",
       "      <td>900000000000207008</td>\n",
       "      <td>126814004</td>\n",
       "      <td>en</td>\n",
       "      <td>900000000000013009</td>\n",
       "      <td>Neoplasm of junctional region of epiglottis</td>\n",
       "      <td>900000000000448009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>103011</td>\n",
       "      <td>20170731</td>\n",
       "      <td>1</td>\n",
       "      <td>900000000000207008</td>\n",
       "      <td>126815003</td>\n",
       "      <td>en</td>\n",
       "      <td>900000000000013009</td>\n",
       "      <td>Neoplasm of lateral wall of oropharynx</td>\n",
       "      <td>900000000000448009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>104017</td>\n",
       "      <td>20170731</td>\n",
       "      <td>1</td>\n",
       "      <td>900000000000207008</td>\n",
       "      <td>126816002</td>\n",
       "      <td>en</td>\n",
       "      <td>900000000000013009</td>\n",
       "      <td>Neoplasm of posterior wall of oropharynx</td>\n",
       "      <td>900000000000448009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>105016</td>\n",
       "      <td>20170731</td>\n",
       "      <td>1</td>\n",
       "      <td>900000000000207008</td>\n",
       "      <td>126817006</td>\n",
       "      <td>en</td>\n",
       "      <td>900000000000013009</td>\n",
       "      <td>Neoplasm of esophagus</td>\n",
       "      <td>900000000000448009</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       id  effectiveTime  active            moduleId  conceptId languageCode  \\\n",
       "0  101013       20170731       1  900000000000207008  126813005           en   \n",
       "1  102018       20170731       1  900000000000207008  126814004           en   \n",
       "2  103011       20170731       1  900000000000207008  126815003           en   \n",
       "3  104017       20170731       1  900000000000207008  126816002           en   \n",
       "4  105016       20170731       1  900000000000207008  126817006           en   \n",
       "\n",
       "               typeId                                         term  \\\n",
       "0  900000000000013009    Neoplasm of anterior aspect of epiglottis   \n",
       "1  900000000000013009  Neoplasm of junctional region of epiglottis   \n",
       "2  900000000000013009       Neoplasm of lateral wall of oropharynx   \n",
       "3  900000000000013009     Neoplasm of posterior wall of oropharynx   \n",
       "4  900000000000013009                        Neoplasm of esophagus   \n",
       "\n",
       "   caseSignificanceId  \n",
       "0  900000000000448009  \n",
       "1  900000000000448009  \n",
       "2  900000000000448009  \n",
       "3  900000000000448009  \n",
       "4  900000000000448009  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "snomed_csv.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eee36071",
   "metadata": {
    "id": "eee36071"
   },
   "source": [
    "Process snomed terms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "fc74afa8",
   "metadata": {
    "id": "fc74afa8"
   },
   "outputs": [],
   "source": [
    "all_ids = snomed_csv['conceptId']\n",
    "all_names = []\n",
    "for i in snomed_csv['term']:\n",
    "    try:\n",
    "        all_names.append(i.lower())\n",
    "    except:\n",
    "        all_names.append('not applicable')\n",
    "#         print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "ecbc8292",
   "metadata": {
    "id": "ecbc8292"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "id                            1491117014\n",
       "effectiveTime                   20030131\n",
       "active                                 1\n",
       "moduleId              900000000000207008\n",
       "conceptId                      385432009\n",
       "languageCode                          en\n",
       "typeId                900000000000013009\n",
       "term                                 NaN\n",
       "caseSignificanceId    900000000000020002\n",
       "Name: 906846, dtype: object"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "snomed_csv.iloc[906846]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "6d11f0d6",
   "metadata": {
    "id": "6d11f0d6"
   },
   "outputs": [],
   "source": [
    "snomed_name_id = [(all_names[i], all_ids[i]) for i in range(len(all_ids))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "f61e031c",
   "metadata": {
    "id": "f61e031c"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1569232"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(all_ids)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "8b2c1e53",
   "metadata": {
    "id": "8b2c1e53"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['neoplasm of anterior aspect of epiglottis',\n",
       " 'neoplasm of junctional region of epiglottis',\n",
       " 'neoplasm of lateral wall of oropharynx',\n",
       " 'neoplasm of posterior wall of oropharynx',\n",
       " 'neoplasm of esophagus',\n",
       " 'neoplasm of cervical esophagus',\n",
       " 'neoplasm of thoracic esophagus',\n",
       " 'neoplasm of abdominal esophagus',\n",
       " 'neoplasm of middle third of esophagus',\n",
       " 'neoplasm of lower third of esophagus']"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_names[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "4de928c7",
   "metadata": {
    "id": "4de928c7"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    126813005\n",
       "1    126814004\n",
       "2    126815003\n",
       "3    126816002\n",
       "4    126817006\n",
       "5    126818001\n",
       "6    126819009\n",
       "7    126820003\n",
       "8    126822006\n",
       "9    126823001\n",
       "Name: conceptId, dtype: int64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_ids[:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0b808263",
   "metadata": {
    "id": "0b808263"
   },
   "source": [
    "# load pubmedbert"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "a7c7ac5b",
   "metadata": {
    "id": "a7c7ac5b"
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "d2c96dea",
   "metadata": {
    "id": "d2c96dea"
   },
   "outputs": [],
   "source": [
    "GPU_COUNT = torch.cuda.device_count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "5c3cfade",
   "metadata": {
    "id": "5c3cfade"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "GPU_COUNT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "7bd1e1f2",
   "metadata": {
    "id": "7bd1e1f2"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "device(type='cuda')"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\") ## specify the GPU id's, GPU id's start from 0.\n",
    "device"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "528023ac",
   "metadata": {
    "id": "528023ac"
   },
   "outputs": [],
   "source": [
    "# from transformers import AutoTokenizer, AutoModel\n",
    "# tokenizer = AutoTokenizer.from_pretrained(\"cambridgeltl/SapBERT-from-PubMedBERT-fulltext\")\n",
    "# model = AutoModel.from_pretrained(\"cambridgeltl/SapBERT-from-PubMedBERT-fulltext\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "Lhh12FnfuwMq",
   "metadata": {
    "id": "Lhh12FnfuwMq"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-11-15 11:09:16.635924: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2022-11-15 11:09:16.898184: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "2022-11-15 11:09:18.830720: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-10.2/lib64:/opt/cudnn-7.6.5.32-cuda-10.2/lib64\n",
      "2022-11-15 11:09:18.830877: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-10.2/lib64:/opt/cudnn-7.6.5.32-cuda-10.2/lib64\n",
      "2022-11-15 11:09:18.830892: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6f2fe6e76459468cb502634e09fadc37",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f889047f747d45ecb0602cbe37b891ea",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading:   0%|          | 0.00/385 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "dfd44f0b70de46a49826a8cd74c184da",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading:   0%|          | 0.00/225k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4c9a209544b54c7691798d76c563af26",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight']\n",
      "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoTokenizer, AutoModel\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract\")\n",
    "model = AutoModel.from_pretrained(\"microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract\")\n",
    "# model = AutoModelForMaskedLM.from_pretrained(\"microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "tlzJasirUq6Y",
   "metadata": {
    "id": "tlzJasirUq6Y"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-11-15 11:10:10.057137: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2022-11-15 11:10:10.057137: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2022-11-15 11:10:10.057137: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2022-11-15 11:10:10.071504: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2022-11-15 11:10:10.318816: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "2022-11-15 11:10:10.320732: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "2022-11-15 11:10:10.334204: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "2022-11-15 11:10:10.341789: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "2022-11-15 11:10:12.013349: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-10.2/lib64:/opt/cudnn-7.6.5.32-cuda-10.2/lib64\n",
      "2022-11-15 11:10:12.013347: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-10.2/lib64:/opt/cudnn-7.6.5.32-cuda-10.2/lib64\n",
      "2022-11-15 11:10:12.013377: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-10.2/lib64:/opt/cudnn-7.6.5.32-cuda-10.2/lib64\n",
      "2022-11-15 11:10:12.013433: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-10.2/lib64:/opt/cudnn-7.6.5.32-cuda-10.2/lib64\n",
      "2022-11-15 11:10:12.013604: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-10.2/lib64:/opt/cudnn-7.6.5.32-cuda-10.2/lib64\n",
      "2022-11-15 11:10:12.013609: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-10.2/lib64:/opt/cudnn-7.6.5.32-cuda-10.2/lib64\n",
      "2022-11-15 11:10:12.013630: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
      "2022-11-15 11:10:12.013636: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
      "2022-11-15 11:10:12.013659: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-10.2/lib64:/opt/cudnn-7.6.5.32-cuda-10.2/lib64\n",
      "2022-11-15 11:10:12.013679: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-10.2/lib64:/opt/cudnn-7.6.5.32-cuda-10.2/lib64\n",
      "2022-11-15 11:10:12.013705: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
      "2022-11-15 11:10:12.013700: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<parallelformers.parallelize.parallelize at 0x149786bd6a60>"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# model = torch.nn.DataParallel(model)\n",
    "# model.to(device)\n",
    "from parallelformers import parallelize\n",
    "parallelize(model, num_gpus=4, fp16=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a3a24048",
   "metadata": {
    "id": "a3a24048"
   },
   "source": [
    "Generate embeddings for snomed labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bb0b8655",
   "metadata": {
    "id": "bb0b8655"
   },
   "outputs": [],
   "source": [
    "# all_names1 = all_names[:100]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "5c5ff31c",
   "metadata": {
    "id": "5c5ff31c"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████| 12260/12260 [13:26<00:00, 15.20it/s]\n"
     ]
    }
   ],
   "source": [
    "bs = 128\n",
    "all_reps = []\n",
    "for i in tqdm(np.arange(0, len(all_names), bs)):\n",
    "    toks = tokenizer.batch_encode_plus(all_names[i:i+bs],\n",
    "                                      padding=\"max_length\",\n",
    "                                      max_length=25,\n",
    "                                      truncation=True,\n",
    "                                      return_tensors=\"pt\")\n",
    "    toks = toks.to(device)\n",
    "    output = model(**toks)\n",
    "    cls_rep = output[0][:,0,:]\n",
    "    \n",
    "    all_reps.append(cls_rep.cpu().detach().numpy())\n",
    "all_reps_emb = np.concatenate(all_reps, axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "c1230654",
   "metadata": {
    "id": "c1230654"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1569232, 768)\n"
     ]
    }
   ],
   "source": [
    "print(all_reps_emb.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "00a427c1",
   "metadata": {
    "id": "00a427c1"
   },
   "outputs": [],
   "source": [
    "all_reps_emb = all_reps_emb.astype(np.float32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "3c884582",
   "metadata": {
    "id": "3c884582"
   },
   "outputs": [],
   "source": [
    "import faiss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "9d7d069d",
   "metadata": {
    "id": "9d7d069d"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n"
     ]
    }
   ],
   "source": [
    "d = all_reps_emb.shape[1]\n",
    "index = faiss.IndexFlatL2(d)   # build the index\n",
    "print(index.is_trained)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "77b258e0",
   "metadata": {
    "id": "77b258e0"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1569232\n"
     ]
    }
   ],
   "source": [
    "index.add(all_reps_emb)                  # add vectors to the index\n",
    "print(index.ntotal)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "40fe39a4",
   "metadata": {
    "id": "40fe39a4"
   },
   "source": [
    "Load ground truth data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "44851e30",
   "metadata": {
    "id": "44851e30"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>filename</th>\n",
       "      <th>mark</th>\n",
       "      <th>label</th>\n",
       "      <th>offset1</th>\n",
       "      <th>offset2</th>\n",
       "      <th>span</th>\n",
       "      <th>code</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>es-S0212-71992007000100007-1</td>\n",
       "      <td>T1</td>\n",
       "      <td>ENFERMEDAD</td>\n",
       "      <td>40</td>\n",
       "      <td>61</td>\n",
       "      <td>arterial hypertension</td>\n",
       "      <td>38341003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>es-S0212-71992007000100007-1</td>\n",
       "      <td>T2</td>\n",
       "      <td>ENFERMEDAD</td>\n",
       "      <td>66</td>\n",
       "      <td>79</td>\n",
       "      <td>polyarthrosis</td>\n",
       "      <td>36186002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>es-S0212-71992007000100007-1</td>\n",
       "      <td>T3</td>\n",
       "      <td>ENFERMEDAD</td>\n",
       "      <td>1682</td>\n",
       "      <td>1698</td>\n",
       "      <td>pleural effusion</td>\n",
       "      <td>60046008</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>es-S0212-71992007000100007-1</td>\n",
       "      <td>T4</td>\n",
       "      <td>ENFERMEDAD</td>\n",
       "      <td>1859</td>\n",
       "      <td>1875</td>\n",
       "      <td>pleural effusion</td>\n",
       "      <td>60046008</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>es-S0212-71992007000100007-1</td>\n",
       "      <td>T5</td>\n",
       "      <td>ENFERMEDAD</td>\n",
       "      <td>1626</td>\n",
       "      <td>1648</td>\n",
       "      <td>lower lobe atelectasis</td>\n",
       "      <td>46621007</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       filename mark       label  offset1  offset2  \\\n",
       "0  es-S0212-71992007000100007-1   T1  ENFERMEDAD       40       61   \n",
       "1  es-S0212-71992007000100007-1   T2  ENFERMEDAD       66       79   \n",
       "2  es-S0212-71992007000100007-1   T3  ENFERMEDAD     1682     1698   \n",
       "3  es-S0212-71992007000100007-1   T4  ENFERMEDAD     1859     1875   \n",
       "4  es-S0212-71992007000100007-1   T5  ENFERMEDAD     1626     1648   \n",
       "\n",
       "                     span      code  \n",
       "0   arterial hypertension  38341003  \n",
       "1           polyarthrosis  36186002  \n",
       "2        pleural effusion  60046008  \n",
       "3        pleural effusion  60046008  \n",
       "4  lower lobe atelectasis  46621007  "
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "entities = pd.read_csv(\"entities.tsv\", delimiter=\"\\t\")\n",
    "entities.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "id": "8a009c68",
   "metadata": {
    "id": "8a009c68",
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['arterial hypertension', 'polyarthrosis', 'pleural effusion', 'pleural effusion', 'lower lobe atelectasis', 'infectious spondylodiscitis d10-d11', 'pleural effusion', 'brucellosis', 'orchiepididymitis', 'goitre']\n",
      "0     38341003\n",
      "1     36186002\n",
      "2     60046008\n",
      "3     60046008\n",
      "4     46621007\n",
      "5    302935008\n",
      "6     60046008\n",
      "7     75702008\n",
      "8    197983000\n",
      "9      3716002\n",
      "Name: code, dtype: object\n"
     ]
    }
   ],
   "source": [
    "inp_names = [i.lower() for i in entities['span']]\n",
    "inp_labels = entities['code']\n",
    "print(inp_names[:10])\n",
    "print(inp_labels[:10])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "id": "90bbf268",
   "metadata": {
    "id": "90bbf268"
   },
   "outputs": [],
   "source": [
    "# c=0\n",
    "# for i in inp_label:\n",
    "# #     if type(i)!=float:\n",
    "#     try:\n",
    "#         [float(i)]\n",
    "#     except:\n",
    "#         c+=1\n",
    "# #         print(i.split('+'))\n",
    "# c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "id": "49562d03",
   "metadata": {
    "id": "49562d03"
   },
   "outputs": [],
   "source": [
    "# inp_names1 = inp_names[:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e6cf5d29",
   "metadata": {
    "id": "e6cf5d29"
   },
   "source": [
    "Generate embeddings for ground truth terms, get their closest snomedct embedding and list out its corresponding snomedct code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "id": "049818b3",
   "metadata": {
    "id": "049818b3"
   },
   "outputs": [],
   "source": [
    "query_toks = tokenizer.batch_encode_plus(list(inp_names),\n",
    "                                        padding = \"max_length\",\n",
    "                                        max_length = 25,\n",
    "                                        truncation=True,\n",
    "                                        return_tensors=\"pt\")\n",
    "query_toks = query_toks.to(device)\n",
    "query_output = model(**query_toks)\n",
    "query_cls_rep = query_output[0][:,0,:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "id": "f0ab19b8",
   "metadata": {
    "id": "f0ab19b8"
   },
   "outputs": [],
   "source": [
    "query_cls_rep = query_cls_rep.cpu().detach().numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "id": "3d90a519",
   "metadata": {
    "id": "3d90a519"
   },
   "outputs": [],
   "source": [
    "query_cls_rep = query_cls_rep.astype(np.float32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "id": "184cd570",
   "metadata": {
    "id": "184cd570"
   },
   "outputs": [],
   "source": [
    "k= 1 # take the 1 closest neighbor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "id": "ac0965a5",
   "metadata": {
    "id": "ac0965a5"
   },
   "outputs": [],
   "source": [
    "D, I = index.search(query_cls_rep, k)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4cfb69ca",
   "metadata": {
    "id": "4cfb69ca"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "id": "2145e65a",
   "metadata": {
    "id": "2145e65a"
   },
   "outputs": [],
   "source": [
    "pred_ids = [all_ids[i[0]] for i in I]\n",
    "# score=sum((pred_ids[i]==inp_label[i])*1 for i in range(len(pred_ids)))\n",
    "# score/len(inp_label)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "85c1243b",
   "metadata": {
    "id": "85c1243b"
   },
   "source": [
    "In ground truth, zero or more than one codes are also present for each term; here only one code is predicted for each query"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "id": "d7476a77",
   "metadata": {
    "id": "d7476a77"
   },
   "outputs": [],
   "source": [
    "p = [[i] for i in pred_ids]\n",
    "t = []\n",
    "for i in inp_labels:\n",
    "    try:\n",
    "        t.append([int(i)])\n",
    "    except:\n",
    "        try:\n",
    "            t.append([int(j) for j in (i.split('+'))])\n",
    "        except:\n",
    "#             print('nomap')\n",
    "            t.append([])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "id": "5a676132",
   "metadata": {
    "id": "5a676132"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "48146000"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p[0][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "id": "424b2281",
   "metadata": {
    "id": "424b2281"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p[0][0] in t[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "id": "ae535967",
   "metadata": {
    "id": "ae535967"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "precision 0.293984962406015\n",
      "recall 0.2872887582659809\n",
      "f1 0.290598231001568\n"
     ]
    }
   ],
   "source": [
    "pre = 0\n",
    "for i in range(len(p)):\n",
    "    if p[i][0] in t[i]:\n",
    "        pre+=1\n",
    "\n",
    "pre /= len(p)\n",
    "print('precision', pre)\n",
    "\n",
    "\n",
    "rec = 0\n",
    "for i in range(len(t)):\n",
    "    if len(t[i])==1:\n",
    "        if t[i][0] in p[i]:\n",
    "            rec+=1\n",
    "    elif len(t[i])>1:\n",
    "        for j in range(len(t[i])):\n",
    "            if t[i][j] in p[i]:\n",
    "                rec+=1\n",
    "\n",
    "rec /= sum(len(i) for i in t)\n",
    "print('recall', rec)       \n",
    "\n",
    "\n",
    "f1 = 2*pre*rec/(pre+rec+np.finfo(np.float32).eps)\n",
    "print('f1', f1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5e963520",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [],
   "machine_shape": "hm",
   "provenance": [
    {
     "file_id": "14SK4V1zyuaUOFhPIxRTzTnuMj0WTBpqR",
     "timestamp": 1668422445307
    }
   ]
  },
  "gpuClass": "premium",
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}