NaBot / Git / Diff of /development/qa-server/finetune

Models:
philipB/
NaBot
Downloads: 1
Diff of /development/qa-server/finetune_qa.ipynb [000000] .. [507a54]
Switch to side-by-side view

--- a
+++ b/development/qa-server/finetune_qa.ipynb
@@ -0,0 +1,1047 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "finetune_qa.ipynb",
+      "provenance": [],
+      "collapsed_sections": [],
+      "machine_shape": "hm"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "accelerator": "TPU"
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "GEqISDRm-Pi4",
+        "outputId": "e0004e89-f57d-4fb3-d1f3-4cf147f7d654"
+      },
+      "source": [
+        "!nvidia-smi"
+      ],
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.\n",
+            "\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "_mL0kbn367e3",
+        "outputId": "72a8c353-75da-4ae4-9bc4-07e320dc5c74"
+      },
+      "source": [
+        "!free -h"
+      ],
+      "execution_count": 2,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "              total        used        free      shared  buff/cache   available\n",
+            "Mem:            35G        901M         32G        1.1M        2.1G         33G\n",
+            "Swap:            0B          0B          0B\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "GhyCgY-yDz39",
+        "outputId": "78c5fda8-61c9-4c26-997e-92fe2c431ada"
+      },
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "execution_count": 3,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Mounted at /content/drive\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "OIaU-LbzD_0j",
+        "outputId": "8ce0dc86-8fe8-4b21-c119-58954245b4b4"
+      },
+      "source": [
+        "%cd /content/drive/MyDrive"
+      ],
+      "execution_count": 4,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "/content/drive/MyDrive\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "KQZyI4jJL-md",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "8cb98a59-7ff5-4c55-8c7b-1b01987c66c4"
+      },
+      "source": [
+        "!wget -c https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json  https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json"
+      ],
+      "execution_count": 6,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "--2021-08-30 00:04:12--  https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json\n",
+            "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...\n",
+            "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.108.153|:443... connected.\n",
+            "HTTP request sent, awaiting response... 416 Range Not Satisfiable\n",
+            "\n",
+            "    The file is already fully retrieved; nothing to do.\n",
+            "\n",
+            "--2021-08-30 00:04:12--  https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json\n",
+            "Reusing existing connection to rajpurkar.github.io:443.\n",
+            "HTTP request sent, awaiting response... 416 Range Not Satisfiable\n",
+            "\n",
+            "    The file is already fully retrieved; nothing to do.\n",
+            "\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "l_E2IOfvixAK",
+        "outputId": "041f8a5e-783c-4f51-a0c8-a2dd6cfa8628"
+      },
+      "source": [
+        "!wget -c https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/ -O evaluate-v2.0.py"
+      ],
+      "execution_count": 115,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "--2021-08-30 00:53:05--  https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/\n",
+            "Resolving worksheets.codalab.org (worksheets.codalab.org)... 13.68.212.115\n",
+            "Connecting to worksheets.codalab.org (worksheets.codalab.org)|13.68.212.115|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Syntax error in Set-Cookie: codalab_session=\"\"; expires=Thu, 01 Jan 1970 00:00:00 GMT; Max-Age=-1; Path=/ at position 70.\n",
+            "Length: unspecified [text/x-python]\n",
+            "Saving to: ‘evaluate-v2.0.py’\n",
+            "\n",
+            "evaluate-v2.0.py        [ <=>                ]  10.30K  --.-KB/s    in 0.001s  \n",
+            "\n",
+            "2021-08-30 00:53:05 (7.31 MB/s) - ‘evaluate-v2.0.py’ saved [10547]\n",
+            "\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "0fTz8b6R7Seb",
+        "outputId": "36fc6527-c9d4-493d-d3b5-9bdca70c3fcc"
+      },
+      "source": [
+        "!git clone https://github.com/huggingface/transformers"
+      ],
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "fatal: destination path 'transformers' already exists and is not an empty directory.\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "JRnXur0H7eir",
+        "outputId": "617279fe-4dbf-453b-8392-e104b78fb72a"
+      },
+      "source": [
+        "!pip install transformers[sentencepiece]"
+      ],
+      "execution_count": 19,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Requirement already satisfied: transformers[sentencepiece] in /usr/local/lib/python3.7/dist-packages (4.9.2)\n",
+            "Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (0.0.45)\n",
+            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (1.19.5)\n",
+            "Requirement already satisfied: huggingface-hub==0.0.12 in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (0.0.12)\n",
+            "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (2.23.0)\n",
+            "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (21.0)\n",
+            "Requirement already satisfied: tokenizers<0.11,>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (0.10.3)\n",
+            "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (4.6.4)\n",
+            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (2019.12.20)\n",
+            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (5.4.1)\n",
+            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (4.62.0)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (3.0.12)\n",
+            "Requirement already satisfied: protobuf in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (3.17.3)\n",
+            "Requirement already satisfied: sentencepiece==0.1.91 in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (0.1.91)\n",
+            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from huggingface-hub==0.0.12->transformers[sentencepiece]) (3.7.4.3)\n",
+            "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers[sentencepiece]) (2.4.7)\n",
+            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers[sentencepiece]) (3.5.0)\n",
+            "Requirement already satisfied: six>=1.9 in /usr/local/lib/python3.7/dist-packages (from protobuf->transformers[sentencepiece]) (1.15.0)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers[sentencepiece]) (2021.5.30)\n",
+            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers[sentencepiece]) (3.0.4)\n",
+            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers[sentencepiece]) (2.10)\n",
+            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers[sentencepiece]) (1.24.3)\n",
+            "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers[sentencepiece]) (1.0.1)\n",
+            "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers[sentencepiece]) (7.1.2)\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "6e5aDRYS7hFj",
+        "outputId": "472364b4-5e81-4d70-e20e-592d211bf4ba"
+      },
+      "source": [
+        "!ls"
+      ],
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            " cache_dir\t    dev-v2.0.json   train-v2.0.json\n",
+            "'Colab Notebooks'   index.html\t    transformers\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "rmFH8vUE7nJ-",
+        "outputId": "5400bce0-4a15-4df5-dc24-c07e5f41a3f3"
+      },
+      "source": [
+        "%cd transformers/examples/legacy/question-answering/"
+      ],
+      "execution_count": 10,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "/content/drive/MyDrive/transformers/examples/legacy/question-answering\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "SZ86XT1x9EnA",
+        "outputId": "394542a3-8946-4909-afdd-c29bef7d644f"
+      },
+      "source": [
+        "!ls ../../../.."
+      ],
+      "execution_count": 11,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            " cache_dir\t    dev-v2.0.json   train-v2.0.json\n",
+            "'Colab Notebooks'   index.html\t    transformers\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "4lb51-W9FNZg",
+        "outputId": "bd384d0f-7bfd-4339-8abc-8da2cd2a7749"
+      },
+      "source": [
+        "!ls ."
+      ],
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "cached_train_distilbert-base-uncased_384  run_squad.py\n",
+            "runs\t\t\t\t\t  run_squad_trainer.py\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mHTc1M1gX7hG"
+      },
+      "source": [
+        "# Fine-Tune albert model on Squad"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "jCLfXOmU8MKg"
+      },
+      "source": [
+        "!python run_squad.py \\\n",
+        "  --model_type albert \\\n",
+        "  --model_name_or_path albert-xlarge-v2 \\\n",
+        "  --tokenizer_name albert-xlarge-v2 \\\n",
+        "  --do_train \\\n",
+        "  --do_eval \\\n",
+        "  --do_lower_case \\\n",
+        "  --train_file ../../../../train-v2.0.json \\\n",
+        "  --predict_file ../../../../dev-v2.0.json \\\n",
+        "  --per_gpu_train_batch_size 12 \\\n",
+        "  --learning_rate 3e-5 \\\n",
+        "  --num_train_epochs 2.0 \\\n",
+        "  --max_seq_length 384 \\\n",
+        "  --doc_stride 128 \\\n",
+        "  --save_steps 300 \\\n",
+        "  --cache_dir ../../../../cache_dir/ \\\n",
+        "  --output_dir ../../../../checkpoint/"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jLIGrk_lZLb_"
+      },
+      "source": [
+        "# Use pretrained ones"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XDq6jE8DYEL-"
+      },
+      "source": [
+        "# List of good models to finetune or download as pretrained"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "JIm-3MNeEIeK"
+      },
+      "source": [
+        "# List of Greate models in QA task\n",
+        "\n",
+        "# ALBERT \n",
+        "# BIOBERT\n",
+        "# microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext (Pytorch)\n",
+        "# microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract (Pytorch)\n",
+        "# dmis-lab/biobert-v1.1 (Pytorch)\n",
+        "# dmis-lab/biobert-base-cased-v1.1 (Pytorch)\n",
+        "# dmis-lab/biobert-large-cased-v1.1-squad (Pytorch)\n",
+        "# ktrapeznikov/biobert_v1.1_pubmed_squad_v2 (Pytorch)\n",
+        "# franklu/pubmed_bert_squadv2 (Pytorch)\n",
+        "# ktrapeznikov/albert-xlarge-v2-squad-v2 (Pytorch)\n",
+        "# ktrapeznikov/scibert_scivocab_uncased_squad_v2 (Pytorch)"
+      ],
+      "execution_count": 16,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Hq9QFfrDXOhd"
+      },
+      "source": [
+        "import torch\n",
+        "import tensorflow as tf\n",
+        "from transformers import AutoTokenizer, AutoModelForQuestionAnswering, TFAutoModelForQuestionAnswering"
+      ],
+      "execution_count": 39,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "88yY4WZ3XBD1"
+      },
+      "source": [
+        "# Albert model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "G0kDmymZ1RK6",
+        "outputId": "8f1fa1c9-a25b-4952-fb3e-95ff9e83f998"
+      },
+      "source": [
+        "albert_tokenizer = AutoTokenizer.from_pretrained(\"ahotrod/albert_xxlargev1_squad2_512\")\n",
+        "albert_model = TFAutoModelForQuestionAnswering.from_pretrained(\"ahotrod/albert_xxlargev1_squad2_512\")"
+      ],
+      "execution_count": 93,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "All model checkpoint layers were used when initializing TFAlbertForQuestionAnswering.\n",
+            "\n",
+            "All the layers of TFAlbertForQuestionAnswering were initialized from the model checkpoint at ahotrod/albert_xxlargev1_squad2_512.\n",
+            "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFAlbertForQuestionAnswering for predictions without further training.\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "NqZ2xm2zWSCR"
+      },
+      "source": [
+        "context = \"\"\"Use acetaminophen exactly as directed on the label, or as prescribed by your doctor. Do not use in larger or smaller amounts or for longer than recommended.\n",
+        "Do not take more of this medication than is recommended. An overdose of acetaminophen can damage your liver or cause death.\n",
+        "Adults and teenagers who weigh at least 110 pounds (50 kilograms): Do not take more than 1000 milligrams (mg) at one time. Do not take more than 4000 mg in 24 hours.\n",
+        "Children younger than 12 years old: Do not take more than 5 doses of acetaminophen in 24 hours. Use only the number of milligrams per dose that is recommended for the child's weight and age. Use exactly as directed on the label.\n",
+        "Avoid also using other medicines that contain acetaminophen, or you could have a fatal overdose.\n",
+        "If you are treating a child, use a pediatric form of acetaminophen. Use only the special dose-measuring dropper or oral syringe that comes with the specific pediatric form you are using. Carefully follow the dosing directions on the medicine label.\n",
+        "Measure liquid medicinewith the dosing syringe provided, or with a special dose-measuring spoon or medicine cup. If you do not have a dose-measuring device, ask your pharmacist for one.\n",
+        "Acetaminophen made for infants is available in two different dose concentrations, and each concentration comes with its own medicine dropper or oral syringe. These dosing devices are not equal between the different concentrations. Using the wrong device may cause you to give your child an overdose of acetaminophen. Never mix and match dosing devices between infant formulations of acetaminophen.\n",
+        "You may need to shake the liquid before each use. Follow the directions on the medicine label.\"\"\"\n",
+        "\n",
+        "question = \"Can you give me dosage information of Acetaminophen for children?\""
+      ],
+      "execution_count": 94,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "AntyS5Q2W7TI",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 36
+        },
+        "outputId": "d17906f6-769b-4026-bf0f-3a40b2e83b3b"
+      },
+      "source": [
+        "inputs = albert_tokenizer.encode_plus(\n",
+        "    question, \n",
+        "    context, \n",
+        "    add_special_tokens=True,\n",
+        "    truncation= True, \n",
+        "    max_length = 512,\n",
+        "    return_tensors=\"tf\")\n",
+        "\n",
+        "input_ids = inputs[\"input_ids\"].numpy()[0]\n",
+        "\n",
+        "answer_start_scores, answer_end_scores = albert_model(\n",
+        "    inputs, return_dict=False)\n",
+        "\n",
+        "answer_start = tf.argmax(answer_start_scores, axis=1).numpy()[0]\n",
+        "answer_end = (tf.argmax(answer_end_scores, axis=1) + 1).numpy()[0]\n",
+        "\n",
+        "answer = albert_tokenizer.convert_tokens_to_string(\n",
+        "    albert_tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))\n",
+        "\n",
+        "answer"
+      ],
+      "execution_count": 96,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "'do not take more than 5 doses of acetaminophen in 24 hours'"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 96
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GuBT5MhtYk3A"
+      },
+      "source": [
+        "# Albert large context - Truncated"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "0Z_Ar4POXcMx"
+      },
+      "source": [
+        "question = \"Can you give me dosage information of Acetaminophen for adults?\""
+      ],
+      "execution_count": 97,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "VCq3j9UjXS-a"
+      },
+      "source": [
+        "context = \"\"\"Use acetaminophen exactly as directed on the label, or as prescribed by your doctor. Do not use in larger or smaller amounts or for longer than recommended.\n",
+        "Do not take more of this medication than is recommended. An overdose of acetaminophen can damage your liver or cause death.\n",
+        "Adults and teenagers who weigh at least 110 pounds (50 kilograms): Do not take more than 1000 milligrams (mg) at one time. Do not take more than 4000 mg in 24 hours.\n",
+        "Children younger than 12 years old: Do not take more than 5 doses of acetaminophen in 24 hours. Use only the number of milligrams per dose that is recommended for the child's weight and age. Use exactly as directed on the label.\n",
+        "Avoid also using other medicines that contain acetaminophen, or you could have a fatal overdose.\n",
+        "If you are treating a child, use a pediatric form of acetaminophen. Use only the special dose-measuring dropper or oral syringe that comes with the specific pediatric form you are using. Carefully follow the dosing directions on the medicine label.\n",
+        "Measure liquid medicinewith the dosing syringe provided, or with a special dose-measuring spoon or medicine cup. If you do not have a dose-measuring device, ask your pharmacist for one.\n",
+        "Acetaminophen made for infants is available in two different dose concentrations, and each concentration comes with its own medicine dropper or oral syringe. These dosing devices are not equal between the different concentrations. Using the wrong device may cause you to give your child an overdose of acetaminophen. Never mix and match dosing devices between infant formulations of acetaminophen.\n",
+        "You may need to shake the liquid before each use. Follow the directions on the medicine label.\n",
+        "The chewable tablet must be chewed thoroughly before you swallow it.\n",
+        "Make sure your hands are dry when handling the acetaminophen disintegrating tablet. Place the tablet on your tongue. It will begin to dissolve right away. Do not swallow the tablet whole. Allow it to dissolve in your mouth without chewing.\n",
+        "To use the acetaminophen effervescent granules, dissolve one packet of the granules in at least 4 ounces of water. Stir this mixture and drink all of it right away. To make sure you get the entire dose, add a little more water to the same glass, swirl gently and drink right away.\n",
+        "The oral powder should be placed directly on the tongue and swallowed.\n",
+        "Stop taking acetaminophen and call your doctor if:\n",
+        "you still have a sore throat after 2 days of use;\n",
+        "you still have a fever after 3 days of use;\n",
+        "you still have pain after 7 days of use (or 5 days if treating a child);\n",
+        "you have a skin rash, ongoing headache, nausea, vomiting, or any redness or swelling; or\n",
+        "if your symptoms get worse, or if you have any new symptoms.\n",
+        "This medication can cause unusual results with certain lab tests for glucose (sugar) in the urine. Tell any doctor who treats you that you are using acetaminophen.\n",
+        "Store at room temperature away from heat and moisture.\n",
+        "Detailed Acetaminophen dosage information\n",
+        "What happens if I miss a dose?\n",
+        "Since acetaminophen is taken as needed, you may not be on a dosing schedule. If you are taking the medication regularly, take the missed dose as soon as you remember. Skip the missed dose if it is almost time for your next scheduled dose. Do not take extra medicine to make up the missed dose.\n",
+        "What happens if I overdose?\n",
+        "Seek emergency medical attention or call the Poison Help line at 1-800-222-1222. An overdose of acetaminophen can be fatal.\n",
+        "The first signs of an acetaminophen overdose include loss of appetite, nausea, vomiting, stomach pain, sweating, and confusion or weakness. Later symptoms may include pain in your upper stomach, dark urine, and yellowing of your skin or the whites of your eyes.\"\"\""
+      ],
+      "execution_count": 98,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 36
+        },
+        "id": "0MsyJew2XKJJ",
+        "outputId": "6a2f0982-4615-4419-924d-fd35bfe22947"
+      },
+      "source": [
+        "inputs = albert_tokenizer.encode_plus(\n",
+        "    question, \n",
+        "    context, \n",
+        "    add_special_tokens=True,\n",
+        "    truncation= True, \n",
+        "    max_length = 512,\n",
+        "    return_tensors=\"tf\")\n",
+        "\n",
+        "input_ids = inputs[\"input_ids\"].numpy()[0]\n",
+        "\n",
+        "answer_start_scores, answer_end_scores = albert_model(\n",
+        "    inputs, return_dict=False)\n",
+        "\n",
+        "answer_start = tf.argmax(answer_start_scores, axis=1).numpy()[0]\n",
+        "answer_end = (tf.argmax(answer_end_scores, axis=1) + 1).numpy()[0]\n",
+        "\n",
+        "answer = albert_tokenizer.convert_tokens_to_string(\n",
+        "    albert_tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))\n",
+        "\n",
+        "answer"
+      ],
+      "execution_count": 99,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "'do not take more than 1000 milligrams (mg) at one time. do not take more than 4000 mg in 24 hours'"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 99
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Sdca_omKYxn1"
+      },
+      "source": [
+        "# Bert"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "i0gxFUcZYzJI",
+        "outputId": "1250c457-7559-4a30-9d24-2a2a6bc813ba"
+      },
+      "source": [
+        "bert_tokenizer = AutoTokenizer.from_pretrained(\"bert-large-uncased-whole-word-masking-finetuned-squad\")\n",
+        "bert_model = TFAutoModelForQuestionAnswering.from_pretrained(\"bert-large-uncased-whole-word-masking-finetuned-squad\")"
+      ],
+      "execution_count": 100,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "All model checkpoint layers were used when initializing TFBertForQuestionAnswering.\n",
+            "\n",
+            "All the layers of TFBertForQuestionAnswering were initialized from the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad.\n",
+            "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForQuestionAnswering for predictions without further training.\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 36
+        },
+        "id": "fEHRhsssZCa2",
+        "outputId": "cb548854-57d5-4ee6-f333-6983a9755716"
+      },
+      "source": [
+        "inputs = bert_tokenizer.encode_plus(\n",
+        "                      question, context, \n",
+        "                      add_special_tokens=True,     \n",
+        "                      truncation= True, \n",
+        "                      max_length = 512,\n",
+        "                      return_tensors=\"tf\")\n",
+        "\n",
+        "input_ids = inputs[\"input_ids\"].numpy()[0]\n",
+        "\n",
+        "answer_start_scores, answer_end_scores = bert_model(\n",
+        "    inputs, return_dict=False)\n",
+        "\n",
+        "answer_start = tf.argmax(answer_start_scores, axis=1).numpy()[0]\n",
+        "answer_end = (tf.argmax(answer_end_scores, axis=1) + 1).numpy()[0]\n",
+        "\n",
+        "answer = bert_tokenizer.convert_tokens_to_string(\n",
+        "    bert_tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))\n",
+        "\n",
+        "answer"
+      ],
+      "execution_count": 101,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "'[SEP]'"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 101
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Gd0aW88YZaag"
+      },
+      "source": [
+        "# PubMedBert"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ejZqx4rKZcuL"
+      },
+      "source": [
+        "pubmed_tokenizer = AutoTokenizer.from_pretrained(\"franklu/pubmed_bert_squadv2\")\n",
+        "pubmed_model = AutoModelForQuestionAnswering.from_pretrained(\"franklu/pubmed_bert_squadv2\")"
+      ],
+      "execution_count": 90,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 54
+        },
+        "id": "cXvI6zQAZiER",
+        "outputId": "3c972a7c-4cf5-417b-bcd3-aa4d0053edd1"
+      },
+      "source": [
+        "inputs = pubmed_tokenizer.encode_plus(question, \n",
+        "                              context,                       \n",
+        "                              truncation= True, \n",
+        "                              max_length = 512,\n",
+        "                              add_special_tokens=True, \n",
+        "                              return_tensors=\"pt\")\n",
+        "\n",
+        "input_ids = inputs[\"input_ids\"].tolist()[0]\n",
+        "\n",
+        "text_tokens = pubmed_tokenizer.convert_ids_to_tokens(input_ids)\n",
+        "answer_start_scores, answer_end_scores = pubmed_model(**inputs).values()\n",
+        "\n",
+        "answer_start = torch.argmax(\n",
+        "    answer_start_scores\n",
+        ")  \n",
+        "answer_end = torch.argmax(answer_end_scores) + 1 \n",
+        "\n",
+        "answer = pubmed_tokenizer.convert_tokens_to_string(pubmed_tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))\n",
+        "\n",
+        "\n",
+        "answer"
+      ],
+      "execution_count": 91,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "\"do not take more than 1000 milligrams ( mg ) at one time. don't take more than 4000 mg in 24 hours. children younger than 12 years old : don't take more than 5 doses of acetaminophen in 24 hours. use only the number of milligrams per dose that is recommended for the child's weight and age. use exactly as directed on the label\""
+            ]
+          },
+          "metadata": {},
+          "execution_count": 91
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4RP2zjAYgzqt"
+      },
+      "source": [
+        "# Biobert"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "B_cD7o0Lg1vE"
+      },
+      "source": [
+        "biobert_tokenizer = AutoTokenizer.from_pretrained(\"ktrapeznikov/biobert_v1.1_pubmed_squad_v2\")\n",
+        "biobert_model = AutoModelForQuestionAnswering.from_pretrained(\"ktrapeznikov/biobert_v1.1_pubmed_squad_v2\")"
+      ],
+      "execution_count": 102,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 36
+        },
+        "id": "MoQnmiBZhAgh",
+        "outputId": "22ad59fb-6a6a-4aaa-e113-00d3aa281159"
+      },
+      "source": [
+        "inputs = biobert_tokenizer.encode_plus(question, \n",
+        "                              context,                       \n",
+        "                              truncation= True, \n",
+        "                              max_length = 512,\n",
+        "                              add_special_tokens=True, \n",
+        "                              return_tensors=\"pt\")\n",
+        "\n",
+        "input_ids = inputs[\"input_ids\"].tolist()[0]\n",
+        "\n",
+        "text_tokens = biobert_tokenizer.convert_ids_to_tokens(input_ids)\n",
+        "answer_start_scores, answer_end_scores = biobert_model(**inputs).values()\n",
+        "\n",
+        "answer_start = torch.argmax(\n",
+        "    answer_start_scores\n",
+        ")  \n",
+        "answer_end = torch.argmax(answer_end_scores) + 1 \n",
+        "\n",
+        "answer = biobert_tokenizer.convert_tokens_to_string(biobert_tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))\n",
+        "\n",
+        "\n",
+        "answer"
+      ],
+      "execution_count": 103,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "'Do not take more than 1000 milligrams ( mg ) at one time'"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 103
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sOLUmivEhOTW"
+      },
+      "source": [
+        "# Scibert"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "aYjd3trjhMp8"
+      },
+      "source": [
+        "scibert_tokenizer = AutoTokenizer.from_pretrained(\"ktrapeznikov/scibert_scivocab_uncased_squad_v2\")\n",
+        "scibert_model = AutoModelForQuestionAnswering.from_pretrained(\"ktrapeznikov/scibert_scivocab_uncased_squad_v2\")"
+      ],
+      "execution_count": 106,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 36
+        },
+        "id": "Sv6V2R0fhcGs",
+        "outputId": "5254db90-81e1-45b6-ba3d-b0a858a8db36"
+      },
+      "source": [
+        "inputs = scibert_tokenizer.encode_plus(question, \n",
+        "                              context,                       \n",
+        "                              truncation= True, \n",
+        "                              max_length = 512,\n",
+        "                              add_special_tokens=True, \n",
+        "                              return_tensors=\"pt\")\n",
+        "\n",
+        "input_ids = inputs[\"input_ids\"].tolist()[0]\n",
+        "\n",
+        "text_tokens = scibert_tokenizer.convert_ids_to_tokens(input_ids)\n",
+        "answer_start_scores, answer_end_scores = scibert_model(**inputs).values()\n",
+        "\n",
+        "answer_start = torch.argmax(\n",
+        "    answer_start_scores\n",
+        ")  \n",
+        "answer_end = torch.argmax(answer_end_scores) + 1 \n",
+        "\n",
+        "answer = scibert_tokenizer.convert_tokens_to_string(scibert_tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))\n",
+        "\n",
+        "\n",
+        "answer"
+      ],
+      "execution_count": 107,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "\"adults and teenagers who weigh at least 110 pounds ( 50 kilograms ) : don't take more than 1000 milligrams ( mg ) at one time\""
+            ]
+          },
+          "metadata": {},
+          "execution_count": 107
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "agV2k_ijXNAX"
+      },
+      "source": [
+        "# Longformer"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "rnG25fQjac9Z",
+        "outputId": "e1b541c6-4421-4a51-f193-f9c52f1161a2"
+      },
+      "source": [
+        "long_tokenizer = AutoTokenizer.from_pretrained(\"mrm8488/longformer-base-4096-finetuned-squadv2\")\n",
+        "long_model = TFAutoModelForQuestionAnswering.from_pretrained(\"mrm8488/longformer-base-4096-finetuned-squadv2\")"
+      ],
+      "execution_count": 28,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "All model checkpoint layers were used when initializing TFLongformerForQuestionAnswering.\n",
+            "\n",
+            "All the layers of TFLongformerForQuestionAnswering were initialized from the model checkpoint at mrm8488/longformer-base-4096-finetuned-squadv2.\n",
+            "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFLongformerForQuestionAnswering for predictions without further training.\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "kaEFBFoHbTC8"
+      },
+      "source": [
+        "inputs = long_tokenizer.encode_plus(\n",
+        "    question, context, add_special_tokens=True, return_tensors=\"tf\")\n",
+        "\n",
+        "input_ids = inputs[\"input_ids\"].numpy()[0]\n",
+        "\n",
+        "answer_start_scores, answer_end_scores = long_model(\n",
+        "    inputs, return_dict=False)\n",
+        "\n",
+        "answer_start = tf.argmax(answer_start_scores, axis=1).numpy()[0]\n",
+        "answer_end = (tf.argmax(answer_end_scores, axis=1) + 1).numpy()[0]\n",
+        "\n",
+        "answer = long_tokenizer.convert_tokens_to_string(\n",
+        "    long_tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))"
+      ],
+      "execution_count": 29,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 36
+        },
+        "id": "LG7tiN2XcL7p",
+        "outputId": "4e8cd666-02d9-4512-e4e1-2b5d167ce824"
+      },
+      "source": [
+        "answer"
+      ],
+      "execution_count": 30,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "'<s>'"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 30
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "DCYuwY8FcOeY"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file