--- a +++ b/development/qa-server/compare_models.ipynb @@ -0,0 +1,1621 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "compare_models.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "CcJoBmV-Jh84", + "outputId": "952dcdd3-4d26-44ef-bcdc-23092a8bb973" + }, + "source": [ + "!pip install transformers[sentencepiece] spacy rouge" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: transformers[sentencepiece] in /usr/local/lib/python3.7/dist-packages (4.10.2)\n", + "Requirement already satisfied: spacy in /usr/local/lib/python3.7/dist-packages (2.2.4)\n", + "Requirement already satisfied: rouge in /usr/local/lib/python3.7/dist-packages (1.0.1)\n", + "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (1.0.5)\n", + "Requirement already satisfied: blis<0.5.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (0.4.1)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (4.62.0)\n", + "Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (0.8.2)\n", + "Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy) (1.0.5)\n", + "Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.7/dist-packages (from spacy) (1.1.3)\n", + "Requirement already satisfied: thinc==7.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (7.4.0)\n", + "Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (1.19.5)\n", + "Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.7/dist-packages (from spacy) (1.0.0)\n", + "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (2.23.0)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from spacy) (57.4.0)\n", + "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy) (3.0.5)\n", + "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy) (2.0.5)\n", + "Requirement already satisfied: importlib-metadata>=0.20 in /usr/local/lib/python3.7/dist-packages (from catalogue<1.1.0,>=0.0.7->spacy) (4.6.4)\n", + "Requirement already satisfied: typing-extensions>=3.6.4 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy) (3.7.4.3)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy) (3.5.0)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (1.24.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (2021.5.30)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (3.0.4)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (2.10)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from rouge) (1.15.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (5.4.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (21.0)\n", + "Requirement already satisfied: huggingface-hub>=0.0.12 in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (0.0.16)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (2019.12.20)\n", + "Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (0.0.45)\n", + "Requirement already satisfied: tokenizers<0.11,>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (0.10.3)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (3.0.12)\n", + "Requirement already satisfied: protobuf in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (3.17.3)\n", + "Requirement already satisfied: sentencepiece==0.1.91 in /usr/local/lib/python3.7/dist-packages (from transformers[sentencepiece]) (0.1.91)\n", + "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers[sentencepiece]) (2.4.7)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers[sentencepiece]) (1.0.1)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers[sentencepiece]) (7.1.2)\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "FA_vVNqQJonA", + "outputId": "25fe6864-6422-4ba7-dc09-633d8b57011f" + }, + "source": [ + "!python -m spacy download en_core_web_lg" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting en_core_web_lg==2.2.5\n", + " Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-2.2.5/en_core_web_lg-2.2.5.tar.gz (827.9 MB)\n", + "\u001b[K |████████████████████████████████| 827.9 MB 1.6 MB/s \n", + "\u001b[?25hRequirement already satisfied: spacy>=2.2.2 in /usr/local/lib/python3.7/dist-packages (from en_core_web_lg==2.2.5) (2.2.4)\n", + "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (2.0.5)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (57.4.0)\n", + "Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (1.1.3)\n", + "Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (1.0.5)\n", + "Requirement already satisfied: thinc==7.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (7.4.0)\n", + "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (3.0.5)\n", + "Requirement already satisfied: blis<0.5.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (0.4.1)\n", + "Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (1.0.0)\n", + "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (2.23.0)\n", + "Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (0.8.2)\n", + "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (1.0.5)\n", + "Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (1.19.5)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (4.62.0)\n", + "Requirement already satisfied: importlib-metadata>=0.20 in /usr/local/lib/python3.7/dist-packages (from catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->en_core_web_lg==2.2.5) (4.6.4)\n", + "Requirement already satisfied: typing-extensions>=3.6.4 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->en_core_web_lg==2.2.5) (3.7.4.3)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->en_core_web_lg==2.2.5) (3.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->en_core_web_lg==2.2.5) (2021.5.30)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->en_core_web_lg==2.2.5) (3.0.4)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->en_core_web_lg==2.2.5) (2.10)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->en_core_web_lg==2.2.5) (1.24.3)\n", + "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n", + "You can now load the model via spacy.load('en_core_web_lg')\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7DDkPtP1JqWr" + }, + "source": [ + "import torch\n", + "import re\n", + "import spacy\n", + "import nltk\n", + "import json\n", + "import en_core_web_lg\n", + "import pandas as pd\n", + "from tqdm import tqdm\n", + "from rouge import Rouge \n", + "from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction\n", + "from transformers import AutoModelForMaskedLM, AutoModel, AutoTokenizer, pipeline" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uHlKEnVyJvgd", + "outputId": "792bdca5-b082-44f9-e26e-a70f8ddf95eb" + }, + "source": [ + "nlp = en_core_web_lg.load()\n", + "nltk.download('punkt')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[nltk_data] Downloading package punkt to /root/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 97 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "giOXiAl3Jvz7", + "outputId": "eb2fa4d7-bd35-48d9-98c6-f38c5e337e3a" + }, + "source": [ + "%cd drive/MyDrive/" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[Errno 2] No such file or directory: 'drive/MyDrive/'\n", + "/content/drive/My Drive\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "urhqR1nTKw3U" + }, + "source": [ + "f = open ('qa.json', \"r\")\n", + "qa = json.loads(f.read())" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ssq5_8g5UB4P", + "outputId": "d67e5b4b-efae-4ac6-ba9f-637cb8013fb2" + }, + "source": [ + "qa['data'][0]" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'context': \"Use acetaminophen exactly as directed on the label, or as prescribed by your doctor. Do not use in larger or smaller amounts or for longer than recommended.\\nDo not take more of this medication than is recommended. An overdose of acetaminophen can damage your liver or cause death.\\nAdults and teenagers who weigh at least 110 pounds (50 kilograms): Do not take more than 1000 milligrams (mg) at one time. Do not take more than 4000 mg in 24 hours.\\nChildren younger than 12 years old: Do not take more than 5 doses of acetaminophen in 24 hours. Use only the number of milligrams per dose that is recommended for the child's weight and age. Use exactly as directed on the label.\\nAvoid also using other medicines that contain acetaminophen, or you could have a fatal overdose.\\nIf you are treating a child, use a pediatric form of acetaminophen. Use only the special dose-measuring dropper or oral syringe that comes with the specific pediatric form you are using. Carefully follow the dosing directions on the medicine label.\\nMeasure liquid medicinewith the dosing syringe provided, or with a special dose-measuring spoon or medicine cup. If you do not have a dose-measuring device, ask your pharmacist for one.\\nAcetaminophen made for infants is available in two different dose concentrations, and each concentration comes with its own medicine dropper or oral syringe. These dosing devices are not equal between the different concentrations. Using the wrong device may cause you to give your child an overdose of acetaminophen. Never mix and match dosing devices between infant formulations of acetaminophen.\\nYou may need to shake the liquid before each use. Follow the directions on the medicine label.\\nThe chewable tablet must be chewed thoroughly before you swallow it.\\nMake sure your hands are dry when handling the acetaminophen disintegrating tablet. Place the tablet on your tongue. It will begin to dissolve right away. Do not swallow the tablet whole. Allow it to dissolve in your mouth without chewing.\\nTo use the acetaminophen effervescent granules, dissolve one packet of the granules in at least 4 ounces of water. Stir this mixture and drink all of it right away. To make sure you get the entire dose, add a little more water to the same glass, swirl gently and drink right away.\\nThe oral powder should be placed directly on the tongue and swallowed.\\nStop taking acetaminophen and call your doctor if:\\nyou still have a sore throat after 2 days of use;\\nyou still have a fever after 3 days of use;\\nyou still have pain after 7 days of use (or 5 days if treating a child);\\nyou have a skin rash, ongoing headache, nausea, vomiting, or any redness or swelling; or\\nif your symptoms get worse, or if you have any new symptoms.\\nThis medication can cause unusual results with certain lab tests for glucose (sugar) in the urine. Tell any doctor who treats you that you are using acetaminophen.\\nStore at room temperature away from heat and moisture.\\nDetailed Acetaminophen dosage information\\nWhat happens if I miss a dose?\\nSince acetaminophen is taken as needed, you may not be on a dosing schedule. If you are taking the medication regularly, take the missed dose as soon as you remember. Skip the missed dose if it is almost time for your next scheduled dose. Do not take extra medicine to make up the missed dose.\\nWhat happens if I overdose?\\nSeek emergency medical attention or call the Poison Help line at 1-800-222-1222. An overdose of acetaminophen can be fatal.\\nThe first signs of an acetaminophen overdose include loss of appetite, nausea, vomiting, stomach pain, sweating, and confusion or weakness. Later symptoms may include pain in your upper stomach, dark urine, and yellowing of your skin or the whites of your eyes.\",\n", + " 'qas': [{'answers': ['Do not take more than 5 doses of acetaminophen in 24 hours.',\n", + " \"Use only the number of milligrams per dose that is recommended for the child's weight and age.\",\n", + " 'Use exactly as directed on the label',\n", + " 'If you are treating a child, use a pediatric form of acetaminophen.',\n", + " 'Use only the special dose-measuring dropper or oral syringe that comes with the specific pediatric form you are using.',\n", + " 'Carefully follow the dosing directions on the medicine label.'],\n", + " 'question': 'Can you give dosage information acetaminophen for children?'},\n", + " {'answers': ['Do not take more than 1000 milligrams (mg) at one time.',\n", + " 'Do not take more than 4000 mg in 24 hours.'],\n", + " 'question': 'Can you give dosage information acetaminophen for adults?'},\n", + " {'answers': ['Since acetaminophen is taken as needed, you may not be on a dosing schedule.',\n", + " 'If you are taking the medication regularly, take the missed dose as soon as you remember.',\n", + " 'Skip the missed dose if it is almost time for your next scheduled dose.',\n", + " 'Do not take extra medicine to make up the missed dose.'],\n", + " 'question': 'What happens if I miss a dose?'},\n", + " {'answers': ['The first signs of an acetaminophen overdose include loss of appetite, nausea, vomiting, stomach pain, sweating, and confusion or weakness.',\n", + " 'Later symptoms may include pain in your upper stomach, dark urine, and yellowing of your skin or the whites of your eyes.'],\n", + " 'question': 'What is the symptoms of acetaminophen overdose?'}]}" + ] + }, + "metadata": {}, + "execution_count": 139 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "biW6mnlTLMdc" + }, + "source": [ + "n_top = 3\n", + "rouge = Rouge()\n", + "smoothie = SmoothingFunction().method4\n", + "bleu_scores = []\n", + "rouge1_scores = []\n", + "rouge2_scores = []\n", + "rougel_scores = []\n", + "\n", + "\n", + "model_names = {'bert-large-uncased-whole-word-masking-finetuned-squad':'question_answering',\n", + " 'ktrapeznikov/albert-xlarge-v2-squad-v2':'question_answering',\n", + " 'allenai/longformer-large-4096-finetuned-triviaqa':'question_answering',\n", + " 'deepset/roberta-large-squad2':'question_answering',\n", + " 'mrm8488/longformer-base-4096-finetuned-squadv2':'question_answering',\n", + " 'mrm8488/squeezebert-finetuned-squadv2':'question_answering',\n", + " 'bigwiz83/sapbert-from-pubmedbert-squad2':'question_answering',\n", + " 'vicgalle/xlm-roberta-large-xnli-anli':'similarity',\n", + " 'joeddav/xlm-roberta-large-xnli':'similarity',\n", + " 'valhalla/distilbart-mnli-12-3':'similarity',\n", + " 'BaptisteDoyen/camembert-base-xnli':'similarity',\n", + " 'typeform/mobilebert-uncased-mnli':'similarity',\n", + " 'valhalla/distilbart-mnli-12-1':'similarity',\n", + " 'valhalla/distilbart-mnli-12-9':'similarity',\n", + " 'cross-encoder/nli-distilroberta-base':'similarity',\n", + " 'bert-large-uncased':'similarity',\n", + " 'bert-large-cased':'similarity',\n", + " 'bert-base-uncased':'similarity',\n", + " 'bert-base-cased':'similarity',\n", + " 'bert-large-finetuned':'similarity',\n", + " 'bert-base-finetuned':'similarity'}" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "8wi60ZiZLRT0" + }, + "source": [ + "class SentenceSimilarity():\n", + " def __init__(self, model_name=None, device='cuda'):\n", + " self.tokenizer = AutoTokenizer.from_pretrained(model_name)\n", + " self.model = AutoModel.from_pretrained(model_name).eval()\n", + "\n", + " def __call__(self, text):\n", + " tokens = self.tokenizer(text, padding=True, return_tensors='pt', truncation=True)\n", + " with torch.no_grad():\n", + " embeddings = self.model(**tokens).last_hidden_state\n", + "\n", + " mask = tokens['attention_mask'].unsqueeze(-1).expand(embeddings.shape).float()\n", + " sentence_embeddings = torch.sum(embeddings * mask, dim=1) / torch.clamp(mask.sum(1), min=1e-9) \n", + " return sentence_embeddings\n", + "\n", + " def compute_label_embedding(self, labels):\n", + " self.label_embeds = self(labels)\n", + " \n", + " def similarity(self, example):\n", + " return torch.cosine_similarity(self(example), self.label_embeds).tolist()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "VUPsEEr9LUFb" + }, + "source": [ + "def get_answer(sentences, question, model, model_type, n_top=None):\n", + " if model_type == 'similarity':\n", + " scores = model.similarity(question)\n", + " dic = {}\n", + "\n", + " for i, ex in enumerate(sentences):\n", + " dic[ex] = scores[i]\n", + "\n", + " s = pd.Series(dic, name='Similarity')\n", + " s.index.name = 'Sentences'\n", + " df = s.reset_index()\n", + " df.sort_values(by='Similarity', ascending=False, inplace=True)\n", + " answer = \" \".join(df.iloc[:n_top]['Sentences'])\n", + "\n", + " elif model_type == 'question_answering':\n", + " context = \" \".join(sentences)\n", + " answer = model(question=question, context=context)['answer']\n", + "\n", + " return answer" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "NW6RmKZGLcCf" + }, + "source": [ + "def get_score(token_reference, token_candidate, sf):\n", + " bleu_score = sentence_bleu(token_reference, token_candidate, smoothing_function=sf, weights=(1, 0, 0, 0))\n", + " rouge_score = rouge.get_scores(candidate, reference)\n", + " return bleu_score, rouge_score[0]['rouge-1']['f'], rouge_score[0]['rouge-2']['f'], rouge_score[0]['rouge-l']['f']" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Q07h24hZLesx" + }, + "source": [ + "def load_model(model_name, model_type):\n", + " if model_type == 'similarity':\n", + " model = SentenceSimilarity(model_name)\n", + "\n", + " elif model_type == 'question_answering':\n", + " model = pipeline(model=model_name, tokenizer=model_name, task=\"question-answering\")\n", + " \n", + " return model" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 708 + }, + "id": "TYHoCqWyL3YO", + "outputId": "57eea346-f28f-4dfd-ce5a-8d7f1cd5d76d" + }, + "source": [ + "df = pd.DataFrame(columns=['Type', 'BLEU', 'ROUGE_1', 'ROUGE_2', 'ROUGE_l'], index=list(model_names.keys()))\n", + "df" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Type</th>\n", + " <th>BLEU</th>\n", + " <th>ROUGE_1</th>\n", + " <th>ROUGE_2</th>\n", + " <th>ROUGE_l</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>bert-large-uncased-whole-word-masking-finetuned-squad</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ktrapeznikov/albert-xlarge-v2-squad-v2</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>allenai/longformer-large-4096-finetuned-triviaqa</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>deepset/roberta-large-squad2</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>mrm8488/longformer-base-4096-finetuned-squadv2</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>mrm8488/squeezebert-finetuned-squadv2</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>bigwiz83/sapbert-from-pubmedbert-squad2</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>vicgalle/xlm-roberta-large-xnli-anli</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>joeddav/xlm-roberta-large-xnli</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>valhalla/distilbart-mnli-12-3</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>BaptisteDoyen/camembert-base-xnli</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>typeform/mobilebert-uncased-mnli</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>valhalla/distilbart-mnli-12-1</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>valhalla/distilbart-mnli-12-9</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>cross-encoder/nli-distilroberta-base</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>bert-large-uncased</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>bert-large-cased</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>bert-base-uncased</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>bert-base-cased</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>bert-large-finetuned</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>bert-base-finetuned</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Type BLEU ... ROUGE_2 ROUGE_l\n", + "bert-large-uncased-whole-word-masking-finetuned... NaN NaN ... NaN NaN\n", + "ktrapeznikov/albert-xlarge-v2-squad-v2 NaN NaN ... NaN NaN\n", + "allenai/longformer-large-4096-finetuned-triviaqa NaN NaN ... NaN NaN\n", + "deepset/roberta-large-squad2 NaN NaN ... NaN NaN\n", + "mrm8488/longformer-base-4096-finetuned-squadv2 NaN NaN ... NaN NaN\n", + "mrm8488/squeezebert-finetuned-squadv2 NaN NaN ... NaN NaN\n", + "bigwiz83/sapbert-from-pubmedbert-squad2 NaN NaN ... NaN NaN\n", + "vicgalle/xlm-roberta-large-xnli-anli NaN NaN ... NaN NaN\n", + "joeddav/xlm-roberta-large-xnli NaN NaN ... NaN NaN\n", + "valhalla/distilbart-mnli-12-3 NaN NaN ... NaN NaN\n", + "BaptisteDoyen/camembert-base-xnli NaN NaN ... NaN NaN\n", + "typeform/mobilebert-uncased-mnli NaN NaN ... NaN NaN\n", + "valhalla/distilbart-mnli-12-1 NaN NaN ... NaN NaN\n", + "valhalla/distilbart-mnli-12-9 NaN NaN ... NaN NaN\n", + "cross-encoder/nli-distilroberta-base NaN NaN ... NaN NaN\n", + "bert-large-uncased NaN NaN ... NaN NaN\n", + "bert-large-cased NaN NaN ... NaN NaN\n", + "bert-base-uncased NaN NaN ... NaN NaN\n", + "bert-base-cased NaN NaN ... NaN NaN\n", + "bert-large-finetuned NaN NaN ... NaN NaN\n", + "bert-base-finetuned NaN NaN ... NaN NaN\n", + "\n", + "[21 rows x 5 columns]" + ] + }, + "metadata": {}, + "execution_count": 105 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4mHGKWNaLhJn", + "outputId": "250361e5-eddb-4cfc-859d-bb8ac06319c9" + }, + "source": [ + "for model_name in tqdm(model_names):\n", + " model_type = model_names[model_name]\n", + " df['Type'][model_name] = model_type\n", + "\n", + " model = load_model(model_name, model_type)\n", + "\n", + " for data in qa['data']:\n", + " context = data['context']\n", + " doc = nlp(context)\n", + " sentences = [str(sentence) for sentence in list(doc.sents)]\n", + "\n", + " if model_type == 'similarity':\n", + " model.compute_label_embedding(sentences)\n", + " \n", + " for q_a in data['qas']:\n", + " question = q_a['question']\n", + " reference = \"\".join(q_a['answers'])\n", + "\n", + " candidate = get_answer(sentences, question, model, model_type, n_top)\n", + " token_reference = nltk.word_tokenize(reference)\n", + " token_candidate = nltk.word_tokenize(candidate)\n", + "\n", + " bleu_score, rouge1_score, rouge2_score, rougel_score = get_score(\n", + " token_reference, token_candidate, smoothie)\n", + "\n", + " bleu_scores.append(bleu_score)\n", + " rouge1_scores.append(rouge1_score)\n", + " rouge2_scores.append(rouge2_score)\n", + " rougel_scores.append(rougel_score)\n", + "\n", + " df['BLEU'][model_name] = sum(bleu_scores)/len(bleu_scores)\n", + " df['ROUGE_1'][model_name] = sum(rouge1_scores)/len(rouge1_scores)\n", + " df['ROUGE_2'][model_name] = sum(rouge2_scores)/len(rouge2_scores)\n", + " df['ROUGE_l'][model_name] = sum(rougel_scores)/len(rougel_scores)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 33%|███▎ | 7/21 [50:52<1:02:07, 266.25s/it]Some weights of the model checkpoint at vicgalle/xlm-roberta-large-xnli-anli were not used when initializing XLMRobertaModel: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']\n", + "- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of XLMRobertaModel were not initialized from the model checkpoint at vicgalle/xlm-roberta-large-xnli-anli and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + " 38%|███▊ | 8/21 [54:26<54:05, 249.67s/it] Some weights of the model checkpoint at joeddav/xlm-roberta-large-xnli were not used when initializing XLMRobertaModel: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']\n", + "- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + " 43%|████▎ | 9/21 [57:59<47:38, 238.19s/it]Some weights of the model checkpoint at valhalla/distilbart-mnli-12-3 were not used when initializing BartModel: ['classification_head.out_proj.weight', 'classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias']\n", + "- This IS expected if you are initializing BartModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BartModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + " 48%|████▊ | 10/21 [1:00:11<37:37, 205.20s/it]Some weights of the model checkpoint at BaptisteDoyen/camembert-base-xnli were not used when initializing CamembertModel: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']\n", + "- This IS expected if you are initializing CamembertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing CamembertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of CamembertModel were not initialized from the model checkpoint at BaptisteDoyen/camembert-base-xnli and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + " 52%|█████▏ | 11/21 [1:01:28<27:39, 165.99s/it]Some weights of the model checkpoint at typeform/mobilebert-uncased-mnli were not used when initializing MobileBertModel: ['classifier.bias', 'classifier.weight']\n", + "- This IS expected if you are initializing MobileBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing MobileBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n", + " 57%|█████▋ | 12/21 [1:01:57<18:38, 124.33s/it]Some weights of the model checkpoint at valhalla/distilbart-mnli-12-1 were not used when initializing BartModel: ['classification_head.out_proj.weight', 'classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias']\n", + "- This IS expected if you are initializing BartModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BartModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + " 62%|██████▏ | 13/21 [1:03:42<15:49, 118.73s/it]Some weights of the model checkpoint at valhalla/distilbart-mnli-12-9 were not used when initializing BartModel: ['classification_head.out_proj.weight', 'classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias']\n", + "- This IS expected if you are initializing BartModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BartModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + " 67%|██████▋ | 14/21 [1:06:40<15:55, 136.56s/it]Some weights of the model checkpoint at cross-encoder/nli-distilroberta-base were not used when initializing RobertaModel: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']\n", + "- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of RobertaModel were not initialized from the model checkpoint at cross-encoder/nli-distilroberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + " 71%|███████▏ | 15/21 [1:07:18<10:41, 106.91s/it]Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']\n", + "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + " 76%|███████▌ | 16/21 [1:10:23<10:50, 130.16s/it]Some weights of the model checkpoint at bert-large-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']\n", + "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + " 81%|████████ | 17/21 [1:13:30<09:49, 147.36s/it]Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']\n", + "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + " 86%|████████▌ | 18/21 [1:14:30<06:03, 121.20s/it]Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']\n", + "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + " 90%|█████████ | 19/21 [1:15:32<03:26, 103.31s/it]Some weights of the model checkpoint at bert-large-finetuned were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']\n", + "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of BertModel were not initialized from the model checkpoint at bert-large-finetuned and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + " 95%|█████████▌| 20/21 [1:18:23<02:03, 123.62s/it]Some weights of the model checkpoint at bert-base-finetuned were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']\n", + "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of BertModel were not initialized from the model checkpoint at bert-base-finetuned and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "100%|██████████| 21/21 [1:19:14<00:00, 226.43s/it]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WX5ytzRqD10j" + }, + "source": [ + "```\n", + " Model name: bert-large-uncased-whole-word-masking-finetuned-squad \n", + " Bleu Scores: 0.04904401154401154 \n", + " Rouge_1 Scores average 0.31644641517394717 \n", + " Rouge_2 Scores average 0.2724766888228745 \n", + " Rouge_l Scores average 0.31644641517394717 \n", + "```\n", + "--- \n", + "```\n", + " Model name: ktrapeznikov/albert-xlarge-v2-squad-v2 \n", + " Bleu Scores: 0.06971486420716247 \n", + " Rouge_1 Scores average 0.29757417507940026 \n", + " Rouge_2 Scores average 0.2536624654219619 \n", + " Rouge_l Scores average 0.29757417507940026 \n", + "```\n", + "--- \n", + "```\n", + " Model name: allenai/longformer-large-4096-finetuned-triviaqa \n", + " Bleu Scores: 0.046476576138108315 \n", + " Rouge_1 Scores average 0.22187419078216236 \n", + " Rouge_2 Scores average 0.17127281242571119 \n", + " Rouge_l Scores average 0.22187419078216236 \n", + "```\n", + "---\n", + "``` \n", + " Model name: deepset/roberta-large-squad2 \n", + " Bleu Scores: 0.04129682604297518 \n", + " Rouge_1 Scores average 0.21955236236558998 \n", + " Rouge_2 Scores average 0.16794483315016406 \n", + " Rouge_l Scores average 0.21887393631402965 \n", + "```\n", + "---\n", + "``` \n", + " Model name: mrm8488/longformer-base-4096-finetuned-squadv2 \n", + " Bleu Scores: 0.04648984178676111 \n", + " Rouge_1 Scores average 0.2385953434228197 \n", + " Rouge_2 Scores average 0.18657712347078392 \n", + " Rouge_l Scores average 0.23769609634271227 \n", + "```\n", + "---\n", + "```\n", + " Model name: mrm8488/squeezebert-finetuned-squadv2 \n", + " Bleu Scores: 0.04722471830548441 \n", + " Rouge_1 Scores average 0.22400095940279424 \n", + " Rouge_2 Scores average 0.17202397066190891 \n", + " Rouge_l Scores average 0.22301484441179562 \n", + "```\n", + "---\n", + "``` \n", + " Model name: bigwiz83/sapbert-from-pubmedbert-squad2 \n", + " Bleu Scores: 0.049341688839488355 \n", + " Rouge_1 Scores average 0.24944919188753056 \n", + " Rouge_2 Scores average 0.19754100964430785 \n", + " Rouge_l Scores average 0.2486039504666746 \n", + "```\n", + "---\n", + "``` \n", + " Model name: vicgalle/xlm-roberta-large-xnli-anli \n", + " Bleu Scores: 0.04740734658870999 \n", + " Rouge_1 Scores average 0.22964171066573294 \n", + " Rouge_2 Scores average 0.17672108237430337 \n", + " Rouge_l Scores average 0.22753524253174262 \n", + "```\n", + "---\n", + "``` \n", + " Model name: joeddav/xlm-roberta-large-xnli \n", + " Bleu Scores: 0.047315274217411704 \n", + " Rouge_1 Scores average 0.21945454186019484 \n", + " Rouge_2 Scores average 0.1635738573008003 \n", + " Rouge_l Scores average 0.21642795239288626 \n", + "```\n", + "---\n", + "``` \n", + " Model name: valhalla/distilbart-mnli-12-3 \n", + " Bleu Scores: 0.04763133893108889 \n", + " Rouge_1 Scores average 0.2087123380971496 \n", + " Rouge_2 Scores average 0.1516332558133015 \n", + " Rouge_l Scores average 0.20482137932196318 \n", + "```\n", + "---\n", + "``` \n", + " Model name: BaptisteDoyen/camembert-base-xnli \n", + " Bleu Scores: 0.04592487652938288 \n", + " Rouge_1 Scores average 0.19921692307268624 \n", + " Rouge_2 Scores average 0.14318905630596554 \n", + " Rouge_l Scores average 0.19567968782251677 \n", + "```\n", + "---\n", + "``` \n", + " Model name: typeform/mobilebert-uncased-mnli \n", + " Bleu Scores: 0.046274723756052905 \n", + " Rouge_1 Scores average 0.1911579999340068 \n", + " Rouge_2 Scores average 0.13433764523023312 \n", + " Rouge_l Scores average 0.18707070849850982 \n", + "```\n", + "---\n", + "``` \n", + " Model name: valhalla/distilbart-mnli-12-1 \n", + " Bleu Scores: 0.046644305973648485 \n", + " Rouge_1 Scores average 0.18605349749283961 \n", + " Rouge_2 Scores average 0.12885250404352783 \n", + " Rouge_l Scores average 0.1814022220313078 \n", + "```\n", + "---\n", + "```\n", + " Model name: valhalla/distilbart-mnli-12-9 \n", + " Bleu Scores: 0.04613591478837148 \n", + " Rouge_1 Scores average 0.17881783397728088 \n", + " Rouge_2 Scores average 0.12270547507575051 \n", + " Rouge_l Scores average 0.17377353349872895 \n", + "```\n", + "---\n", + "``` \n", + " Model name: cross-encoder/nli-distilroberta-base \n", + " Bleu Scores: 0.04638736081156602 \n", + " Rouge_1 Scores average 0.17832898668655725 \n", + " Rouge_2 Scores average 0.12137505515814476 \n", + " Rouge_l Scores average 0.1729153552918669 \n", + "```\n", + "---\n", + "``` \n", + " Model name: bert-large-uncased \n", + " Bleu Scores: 0.04670057489523694 \n", + " Rouge_1 Scores average 0.17682194476510882 \n", + " Rouge_2 Scores average 0.11873541679211357 \n", + " Rouge_l Scores average 0.1711206526046795 \n", + "```\n", + "---\n", + "``` \n", + " Model name: bert-large-cased \n", + " Bleu Scores: 0.04653593699430314 \n", + " Rouge_1 Scores average 0.1730175007118266 \n", + " Rouge_2 Scores average 0.11344737213744338 \n", + " Rouge_l Scores average 0.1667599034710913 \n", + "```\n", + "---\n", + "``` \n", + " Model name: bert-base-uncased \n", + " Bleu Scores: 0.0466282799842496 \n", + " Rouge_1 Scores average 0.17135281702814545 \n", + " Rouge_2 Scores average 0.11131447883448312 \n", + " Rouge_l Scores average 0.16487847216815818 \n", + "```\n", + "---\n", + "``` \n", + " Model name: bert-base-cased \n", + " Bleu Scores: 0.046950278910238195 \n", + " Rouge_1 Scores average 0.1697269807317403 \n", + " Rouge_2 Scores average 0.10887071360951835 \n", + " Rouge_l Scores average 0.1631931474452232 \n", + "```\n", + "---\n", + "```\n", + " Model name: model/bert_med/checkpoint/checkpoint-final \n", + " Bleu Scores: 0.04708176338444994 \n", + " Rouge_1 Scores average 0.16765655912804828 \n", + " Rouge_2 Scores average 0.10720455061615725 \n", + " Rouge_l Scores average 0.16110637360774427 \n", + "```\n", + "---\n", + "```\n", + " Model name: checkpoint-10000 \n", + " Bleu Scores: 0.046965961369023486 \n", + " Rouge_1 Scores average 0.16574710188693653 \n", + " Rouge_2 Scores average 0.105084539300102 \n", + " Rouge_l Scores average 0.1592690830965846 \n", + " ```" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 708 + }, + "id": "Pf2xpu1oLxiH", + "outputId": "c68191a7-1282-4c61-9715-574eaa6066ea" + }, + "source": [ + "df" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Type</th>\n", + " <th>BLEU</th>\n", + " <th>ROUGE_1</th>\n", + " <th>ROUGE_2</th>\n", + " <th>ROUGE_l</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>bert-large-uncased-whole-word-masking-finetuned-squad</th>\n", + " <td>question_answering</td>\n", + " <td>0.049044</td>\n", + " <td>0.316446</td>\n", + " <td>0.272477</td>\n", + " <td>0.316446</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ktrapeznikov/albert-xlarge-v2-squad-v2</th>\n", + " <td>question_answering</td>\n", + " <td>0.0697149</td>\n", + " <td>0.297574</td>\n", + " <td>0.253662</td>\n", + " <td>0.297574</td>\n", + " </tr>\n", + " <tr>\n", + " <th>allenai/longformer-large-4096-finetuned-triviaqa</th>\n", + " <td>question_answering</td>\n", + " <td>0.0464766</td>\n", + " <td>0.221874</td>\n", + " <td>0.171273</td>\n", + " <td>0.221874</td>\n", + " </tr>\n", + " <tr>\n", + " <th>deepset/roberta-large-squad2</th>\n", + " <td>question_answering</td>\n", + " <td>0.0412968</td>\n", + " <td>0.219552</td>\n", + " <td>0.167945</td>\n", + " <td>0.218874</td>\n", + " </tr>\n", + " <tr>\n", + " <th>mrm8488/longformer-base-4096-finetuned-squadv2</th>\n", + " <td>question_answering</td>\n", + " <td>0.0464898</td>\n", + " <td>0.238595</td>\n", + " <td>0.186577</td>\n", + " <td>0.237696</td>\n", + " </tr>\n", + " <tr>\n", + " <th>mrm8488/squeezebert-finetuned-squadv2</th>\n", + " <td>question_answering</td>\n", + " <td>0.0472247</td>\n", + " <td>0.224001</td>\n", + " <td>0.172024</td>\n", + " <td>0.223015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>bigwiz83/sapbert-from-pubmedbert-squad2</th>\n", + " <td>question_answering</td>\n", + " <td>0.0493417</td>\n", + " <td>0.249449</td>\n", + " <td>0.197541</td>\n", + " <td>0.248604</td>\n", + " </tr>\n", + " <tr>\n", + " <th>vicgalle/xlm-roberta-large-xnli-anli</th>\n", + " <td>similarity</td>\n", + " <td>0.0474073</td>\n", + " <td>0.229642</td>\n", + " <td>0.176721</td>\n", + " <td>0.227535</td>\n", + " </tr>\n", + " <tr>\n", + " <th>joeddav/xlm-roberta-large-xnli</th>\n", + " <td>similarity</td>\n", + " <td>0.0473153</td>\n", + " <td>0.219455</td>\n", + " <td>0.163574</td>\n", + " <td>0.216428</td>\n", + " </tr>\n", + " <tr>\n", + " <th>valhalla/distilbart-mnli-12-3</th>\n", + " <td>similarity</td>\n", + " <td>0.0476313</td>\n", + " <td>0.208712</td>\n", + " <td>0.151633</td>\n", + " <td>0.204821</td>\n", + " </tr>\n", + " <tr>\n", + " <th>BaptisteDoyen/camembert-base-xnli</th>\n", + " <td>similarity</td>\n", + " <td>0.0459249</td>\n", + " <td>0.199217</td>\n", + " <td>0.143189</td>\n", + " <td>0.19568</td>\n", + " </tr>\n", + " <tr>\n", + " <th>typeform/mobilebert-uncased-mnli</th>\n", + " <td>similarity</td>\n", + " <td>0.0462747</td>\n", + " <td>0.191158</td>\n", + " <td>0.134338</td>\n", + " <td>0.187071</td>\n", + " </tr>\n", + " <tr>\n", + " <th>valhalla/distilbart-mnli-12-1</th>\n", + " <td>similarity</td>\n", + " <td>0.0466443</td>\n", + " <td>0.186053</td>\n", + " <td>0.128853</td>\n", + " <td>0.181402</td>\n", + " </tr>\n", + " <tr>\n", + " <th>valhalla/distilbart-mnli-12-9</th>\n", + " <td>similarity</td>\n", + " <td>0.0461359</td>\n", + " <td>0.178818</td>\n", + " <td>0.122705</td>\n", + " <td>0.173774</td>\n", + " </tr>\n", + " <tr>\n", + " <th>cross-encoder/nli-distilroberta-base</th>\n", + " <td>similarity</td>\n", + " <td>0.0463874</td>\n", + " <td>0.178329</td>\n", + " <td>0.121375</td>\n", + " <td>0.172915</td>\n", + " </tr>\n", + " <tr>\n", + " <th>bert-large-uncased</th>\n", + " <td>similarity</td>\n", + " <td>0.0467006</td>\n", + " <td>0.176822</td>\n", + " <td>0.118735</td>\n", + " <td>0.171121</td>\n", + " </tr>\n", + " <tr>\n", + " <th>bert-large-cased</th>\n", + " <td>similarity</td>\n", + " <td>0.0465359</td>\n", + " <td>0.173018</td>\n", + " <td>0.113447</td>\n", + " <td>0.16676</td>\n", + " </tr>\n", + " <tr>\n", + " <th>bert-base-uncased</th>\n", + " <td>similarity</td>\n", + " <td>0.0466283</td>\n", + " <td>0.171353</td>\n", + " <td>0.111314</td>\n", + " <td>0.164878</td>\n", + " </tr>\n", + " <tr>\n", + " <th>bert-base-cased</th>\n", + " <td>similarity</td>\n", + " <td>0.0469503</td>\n", + " <td>0.169727</td>\n", + " <td>0.108871</td>\n", + " <td>0.163193</td>\n", + " </tr>\n", + " <tr>\n", + " <th>bert-large-finetuned</th>\n", + " <td>similarity</td>\n", + " <td>0.0470818</td>\n", + " <td>0.167657</td>\n", + " <td>0.107205</td>\n", + " <td>0.161106</td>\n", + " </tr>\n", + " <tr>\n", + " <th>bert-base-finetuned</th>\n", + " <td>similarity</td>\n", + " <td>0.046966</td>\n", + " <td>0.165747</td>\n", + " <td>0.105085</td>\n", + " <td>0.159269</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Type ... ROUGE_l\n", + "bert-large-uncased-whole-word-masking-finetuned... question_answering ... 0.316446\n", + "ktrapeznikov/albert-xlarge-v2-squad-v2 question_answering ... 0.297574\n", + "allenai/longformer-large-4096-finetuned-triviaqa question_answering ... 0.221874\n", + "deepset/roberta-large-squad2 question_answering ... 0.218874\n", + "mrm8488/longformer-base-4096-finetuned-squadv2 question_answering ... 0.237696\n", + "mrm8488/squeezebert-finetuned-squadv2 question_answering ... 0.223015\n", + "bigwiz83/sapbert-from-pubmedbert-squad2 question_answering ... 0.248604\n", + "vicgalle/xlm-roberta-large-xnli-anli similarity ... 0.227535\n", + "joeddav/xlm-roberta-large-xnli similarity ... 0.216428\n", + "valhalla/distilbart-mnli-12-3 similarity ... 0.204821\n", + "BaptisteDoyen/camembert-base-xnli similarity ... 0.19568\n", + "typeform/mobilebert-uncased-mnli similarity ... 0.187071\n", + "valhalla/distilbart-mnli-12-1 similarity ... 0.181402\n", + "valhalla/distilbart-mnli-12-9 similarity ... 0.173774\n", + "cross-encoder/nli-distilroberta-base similarity ... 0.172915\n", + "bert-large-uncased similarity ... 0.171121\n", + "bert-large-cased similarity ... 0.16676\n", + "bert-base-uncased similarity ... 0.164878\n", + "bert-base-cased similarity ... 0.163193\n", + "bert-large-finetuned similarity ... 0.161106\n", + "bert-base-finetuned similarity ... 0.159269\n", + "\n", + "[21 rows x 5 columns]" + ] + }, + "metadata": {}, + "execution_count": 107 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "xKU5BhiGQErn" + }, + "source": [ + "df.to_csv('qa_static.csv')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "HGhoAqCPura2" + }, + "source": [ + "df = pd.read_csv('qa_static.csv')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lLyCMD3evZxT", + "outputId": "5f461772-8b68-4cb2-f869-94016b329bd6" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 bert-large-uncased-whole-word-masking-finetune...\n", + "1 ktrapeznikov/albert-xlarge-v2-squad-v2\n", + "2 allenai/longformer-large-4096-finetuned-triviaqa\n", + "3 deepset/roberta-large-squad2\n", + "4 mrm8488/longformer-base-4096-finetuned-squadv2\n", + "5 mrm8488/squeezebert-finetuned-squadv2\n", + "6 bigwiz83/sapbert-from-pubmedbert-squad2\n", + "Name: Unnamed: 0, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 128 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 489 + }, + "id": "sM0g85xJNyPa", + "outputId": "2cc70e4b-95e5-4bda-dfd4-61b1ac1ef491" + }, + "source": [ + "df.iloc[7:]" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Unnamed: 0</th>\n", + " <th>Type</th>\n", + " <th>BLEU</th>\n", + " <th>ROUGE_1</th>\n", + " <th>ROUGE_2</th>\n", + " <th>ROUGE_l</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>vicgalle/xlm-roberta-large-xnli-anli</td>\n", + " <td>similarity</td>\n", + " <td>0.047407</td>\n", + " <td>0.229642</td>\n", + " <td>0.176721</td>\n", + " <td>0.227535</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>joeddav/xlm-roberta-large-xnli</td>\n", + " <td>similarity</td>\n", + " <td>0.047315</td>\n", + " <td>0.219455</td>\n", + " <td>0.163574</td>\n", + " <td>0.216428</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>valhalla/distilbart-mnli-12-3</td>\n", + " <td>similarity</td>\n", + " <td>0.047631</td>\n", + " <td>0.208712</td>\n", + " <td>0.151633</td>\n", + " <td>0.204821</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>BaptisteDoyen/camembert-base-xnli</td>\n", + " <td>similarity</td>\n", + " <td>0.045925</td>\n", + " <td>0.199217</td>\n", + " <td>0.143189</td>\n", + " <td>0.195680</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>typeform/mobilebert-uncased-mnli</td>\n", + " <td>similarity</td>\n", + " <td>0.046275</td>\n", + " <td>0.191158</td>\n", + " <td>0.134338</td>\n", + " <td>0.187071</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>valhalla/distilbart-mnli-12-1</td>\n", + " <td>similarity</td>\n", + " <td>0.046644</td>\n", + " <td>0.186053</td>\n", + " <td>0.128853</td>\n", + " <td>0.181402</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>valhalla/distilbart-mnli-12-9</td>\n", + " <td>similarity</td>\n", + " <td>0.046136</td>\n", + " <td>0.178818</td>\n", + " <td>0.122705</td>\n", + " <td>0.173774</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>cross-encoder/nli-distilroberta-base</td>\n", + " <td>similarity</td>\n", + " <td>0.046387</td>\n", + " <td>0.178329</td>\n", + " <td>0.121375</td>\n", + " <td>0.172915</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>bert-large-uncased</td>\n", + " <td>similarity</td>\n", + " <td>0.046701</td>\n", + " <td>0.176822</td>\n", + " <td>0.118735</td>\n", + " <td>0.171121</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>bert-large-cased</td>\n", + " <td>similarity</td>\n", + " <td>0.046536</td>\n", + " <td>0.173018</td>\n", + " <td>0.113447</td>\n", + " <td>0.166760</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>bert-base-uncased</td>\n", + " <td>similarity</td>\n", + " <td>0.046628</td>\n", + " <td>0.171353</td>\n", + " <td>0.111314</td>\n", + " <td>0.164878</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>bert-base-cased</td>\n", + " <td>similarity</td>\n", + " <td>0.046950</td>\n", + " <td>0.169727</td>\n", + " <td>0.108871</td>\n", + " <td>0.163193</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>bert-large-finetuned</td>\n", + " <td>similarity</td>\n", + " <td>0.047082</td>\n", + " <td>0.167657</td>\n", + " <td>0.107205</td>\n", + " <td>0.161106</td>\n", + " </tr>\n", + " <tr>\n", + " <th>20</th>\n", + " <td>bert-base-finetuned</td>\n", + " <td>similarity</td>\n", + " <td>0.046966</td>\n", + " <td>0.165747</td>\n", + " <td>0.105085</td>\n", + " <td>0.159269</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Unnamed: 0 Type ... ROUGE_2 ROUGE_l\n", + "7 vicgalle/xlm-roberta-large-xnli-anli similarity ... 0.176721 0.227535\n", + "8 joeddav/xlm-roberta-large-xnli similarity ... 0.163574 0.216428\n", + "9 valhalla/distilbart-mnli-12-3 similarity ... 0.151633 0.204821\n", + "10 BaptisteDoyen/camembert-base-xnli similarity ... 0.143189 0.195680\n", + "11 typeform/mobilebert-uncased-mnli similarity ... 0.134338 0.187071\n", + "12 valhalla/distilbart-mnli-12-1 similarity ... 0.128853 0.181402\n", + "13 valhalla/distilbart-mnli-12-9 similarity ... 0.122705 0.173774\n", + "14 cross-encoder/nli-distilroberta-base similarity ... 0.121375 0.172915\n", + "15 bert-large-uncased similarity ... 0.118735 0.171121\n", + "16 bert-large-cased similarity ... 0.113447 0.166760\n", + "17 bert-base-uncased similarity ... 0.111314 0.164878\n", + "18 bert-base-cased similarity ... 0.108871 0.163193\n", + "19 bert-large-finetuned similarity ... 0.107205 0.161106\n", + "20 bert-base-finetuned similarity ... 0.105085 0.159269\n", + "\n", + "[14 rows x 6 columns]" + ] + }, + "metadata": {}, + "execution_count": 115 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6P-PK_r8OQQl", + "outputId": "865b5255-c7ba-4571-f7b9-52c74bc24cfd" + }, + "source": [ + "list(df.iloc[7:]['Unnamed: 0'])" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['vicgalle/xlm-roberta-large-xnli-anli',\n", + " 'joeddav/xlm-roberta-large-xnli',\n", + " 'valhalla/distilbart-mnli-12-3',\n", + " 'BaptisteDoyen/camembert-base-xnli',\n", + " 'typeform/mobilebert-uncased-mnli',\n", + " 'valhalla/distilbart-mnli-12-1',\n", + " 'valhalla/distilbart-mnli-12-9',\n", + " 'cross-encoder/nli-distilroberta-base',\n", + " 'bert-large-uncased',\n", + " 'bert-large-cased',\n", + " 'bert-base-uncased',\n", + " 'bert-base-cased',\n", + " 'bert-large-finetuned',\n", + " 'bert-base-finetuned']" + ] + }, + "metadata": {}, + "execution_count": 120 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_vAb9djfOLGi" + }, + "source": [ + "index = list(df.iloc[7:]['Unnamed: 0'])" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 479 + }, + "id": "Ys5wOUp_3p44", + "outputId": "44d1e19b-91ec-41d0-9ae2-4b6179a7bc96" + }, + "source": [ + "df.iloc[:6].plot.bar(rot=0, subplots=True, figsize=(8, 6))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([<matplotlib.axes._subplots.AxesSubplot object at 0x7fbfb85536d0>,\n", + " <matplotlib.axes._subplots.AxesSubplot object at 0x7fbfb8453f50>,\n", + " <matplotlib.axes._subplots.AxesSubplot object at 0x7fbfb8765510>,\n", + " <matplotlib.axes._subplots.AxesSubplot object at 0x7fbfb8381750>],\n", + " dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 136 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 576x432 with 4 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 588 + }, + "id": "j3I3FobLNgJ3", + "outputId": "20495d22-44dc-4c18-affb-a71da40e4ba6" + }, + "source": [ + "df.iloc[7:].plot.bar(rot=0, subplots=True, figsize=(16, 8))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([<matplotlib.axes._subplots.AxesSubplot object at 0x7fbfba576150>,\n", + " <matplotlib.axes._subplots.AxesSubplot object at 0x7fbfba179b10>,\n", + " <matplotlib.axes._subplots.AxesSubplot object at 0x7fbfc1e30c10>,\n", + " <matplotlib.axes._subplots.AxesSubplot object at 0x7fbfba167210>],\n", + " dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 132 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 1152x576 with 4 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 588 + }, + "id": "AsCAmTELQuf4", + "outputId": "16196351-a0d4-44c3-8576-90789a1ae6fa" + }, + "source": [ + "df.plot.bar(rot=0, subplots=True, figsize=(20, 8))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([<matplotlib.axes._subplots.AxesSubplot object at 0x7fbfb88a6650>,\n", + " <matplotlib.axes._subplots.AxesSubplot object at 0x7fbfb8855e50>,\n", + " <matplotlib.axes._subplots.AxesSubplot object at 0x7fbfb8886b90>,\n", + " <matplotlib.axes._subplots.AxesSubplot object at 0x7fbfb87c0610>],\n", + " dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 135 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 1440x576 with 4 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "balb23Y6Sh8v" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file