821 lines (821 with data), 33.5 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "-a306_WSUXk0"
},
"source": [
"# Evaluation of Answer Generation performance with BERTscore\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HV93aEaWniBx",
"outputId": "b5f4d3d3-6ed1-4bcf-8f6f-fba5c3b8743a"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m105.0/105.0 MB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
" Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m270.9/270.9 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Building wheel for peft (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
" Building wheel for accelerate (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.1/61.1 kB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.5/85.5 MB\u001b[0m \u001b[31m19.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m798.0/798.0 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m38.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m216.6/216.6 kB\u001b[0m \u001b[31m25.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.3/48.3 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m35.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Building wheel for sentence_transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
]
}
],
"source": [
"!pip install -q -U bitsandbytes\n",
"!pip install -q -U git+https://github.com/huggingface/transformers.git\n",
"!pip install -q -U git+https://github.com/huggingface/peft.git\n",
"!pip install -q -U git+https://github.com/huggingface/accelerate.git\n",
"!pip install -q datasets\n",
"!pip install -q bert_score\n",
"!pip install -q faiss-gpu\n",
"!pip install -q langchain\n",
"!pip install -q sentence_transformers"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "-xkfr66OepkC"
},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"from datasets import Dataset"
]
},
{
"cell_type": "code",
"source": [
"import json\n",
"from datasets import load_metric\n",
"\n",
"# Load csv dataset\n",
"def load_dataset_from_csv(file_path):\n",
" return pd.read_csv(file_path)\n",
"\n",
"# Calculate BERTScore\n",
"def bertscore_(results_file_path, output_file_path, csv_file_path, bertscore):\n",
" with open(results_file_path, 'r') as file:\n",
" predictions = json.load(file)\n",
"\n",
" test_dataset = load_dataset_from_csv(csv_file_path)\n",
" ref_ans = test_dataset['Answer'].tolist()\n",
"\n",
" results = bertscore.compute(predictions=predictions, references=ref_ans, lang=\"en\", model_type='microsoft/deberta-xlarge-mnli')\n",
"\n",
" with open(output_file_path, 'w') as file:\n",
" json.dump(results, file)\n",
"\n",
" print(f\"Results saved to {output_file_path}\")\n",
"\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 17
},
"id": "iZFLSZ8dOLGk",
"outputId": "9ff14328-2b50-4fa6-a932-a3332114484d"
},
"execution_count": 23,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <style>\n",
" pre {\n",
" white-space: pre-wrap;\n",
" }\n",
" </style>\n",
" "
]
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"test_dataset = \"/content/Test_dataset.csv\"\n",
"bertscore = load_metric(\"bertscore\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 106
},
"id": "5Otdn4clQWAX",
"outputId": "84f63d58-df35-4862-a3fc-9117e103fa08"
},
"execution_count": 21,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <style>\n",
" pre {\n",
" white-space: pre-wrap;\n",
" }\n",
" </style>\n",
" "
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/datasets/load.py:752: FutureWarning: The repository for bertscore contains custom code which must be executed to correctly load the metric. You can inspect the repository content at https://raw.githubusercontent.com/huggingface/datasets/2.16.1/metrics/bertscore/bertscore.py\n",
"You can avoid this message in future by passing the argument `trust_remote_code=True`.\n",
"Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.\n",
" warnings.warn(\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"results_file_path = '/content/Answer_HyDE_with_reranker.json'\n",
"output_file_path = '/content/Bertscore_Answer_HyDE_with_reranker.json'\n",
"\n",
"bertscore_(results_file_path, output_file_path, test_dataset, bertscore)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "G3q5c6JG16_l",
"outputId": "e152298f-dffd-477c-e666-30d93ebfc705"
},
"execution_count": 25,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <style>\n",
" pre {\n",
" white-space: pre-wrap;\n",
" }\n",
" </style>\n",
" "
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Results saved to /content/Bertscore_Answer_HyDE_with_reranker.json\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"results_file_path = '/content/Answer_QA_RAG.json'\n",
"output_file_path = '/content/Bertscore_Answer_QA_RAG.json'\n",
"\n",
"bertscore_(results_file_path, output_file_path, test_dataset, bertscore)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "43oCrBvP2b3D",
"outputId": "8d07e7ae-eb16-4ffa-ed6c-729ae11c8293"
},
"execution_count": 26,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <style>\n",
" pre {\n",
" white-space: pre-wrap;\n",
" }\n",
" </style>\n",
" "
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Results saved to /content/Bertscore_Answer_QA_RAG.json\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"results_file_path = '/content/Answer_Only_question.json'\n",
"output_file_path = '/content/Bertscore_Answer_Only_question.json'\n",
"\n",
"bertscore_(results_file_path, output_file_path, test_dataset, bertscore)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "J5zyNLJoyLT3",
"outputId": "d2598938-13d9-4250-b242-554ee79b702a"
},
"execution_count": 27,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <style>\n",
" pre {\n",
" white-space: pre-wrap;\n",
" }\n",
" </style>\n",
" "
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Results saved to /content/Bertscore_Answer_Only_question.json\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"results_file_path = '/content/Answer_Multiquery_questions.json'\n",
"output_file_path = '/content/Bertscore_Answer_Multiquery_questions.json'\n",
"\n",
"bertscore_(results_file_path, output_file_path, test_dataset, bertscore)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "6TDxxjfDyLQ1",
"outputId": "632a98a8-b94c-45a3-808d-5505e7f94c5f"
},
"execution_count": 28,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <style>\n",
" pre {\n",
" white-space: pre-wrap;\n",
" }\n",
" </style>\n",
" "
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Results saved to /content/Bertscore_Answer_Multiquery_questions.json\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"results_file_path = '/content/Answer_Only_hypothetical_answer.json'\n",
"output_file_path = '/content/Bertscore_Answer_Only_hypothetical_answer.json'\n",
"\n",
"bertscore_(results_file_path, output_file_path, test_dataset, bertscore)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "_YglqHYAyLN-",
"outputId": "979eaa6a-e0bf-4e8f-b719-cc1878865ad9"
},
"execution_count": 29,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <style>\n",
" pre {\n",
" white-space: pre-wrap;\n",
" }\n",
" </style>\n",
" "
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Results saved to /content/Bertscore_Answer_Only_hypothetical_answer.json\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Dx_S_L3U-8Ni"
},
"source": [
"#### Analyze"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"id": "uTpr6qOCvNsy",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 17
},
"outputId": "a8e99a98-1879-483e-c5be-6f39aa5cf5e1"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <style>\n",
" pre {\n",
" white-space: pre-wrap;\n",
" }\n",
" </style>\n",
" "
]
},
"metadata": {}
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"# Function to load JSON data\n",
"def load_json(filename):\n",
" with open(filename, 'r') as file:\n",
" return json.load(file)\n",
"\n",
"# Load BERTScore results\n",
"Bertscore_Answer_HyDE_with_reranker = load_json('/content/Bertscore_Answer_HyDE_with_reranker.json')\n",
"Bertscore_Answer_QA_RAG = load_json('/content/Bertscore_Answer_QA_RAG.json')\n",
"Bertscore_Answer_Only_question = load_json('/content/Bertscore_Answer_Only_question.json')\n",
"Bertscore_Answer_Multiquery_questions = load_json('/content/Bertscore_Answer_Multiquery_questions.json')\n",
"Bertscore_Answer_Only_hypothetical_answer = load_json('/content/Bertscore_Answer_Only_hypothetical_answer.json')"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "KFM7TNLR-5pg",
"outputId": "8f77f9a5-ae56-456d-8128-98fba87ff2f6"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <style>\n",
" pre {\n",
" white-space: pre-wrap;\n",
" }\n",
" </style>\n",
" "
]
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
" precision recall f1\n",
"QA_RAG 0.550603 0.645380 0.591428\n",
"Multiquery_questions 0.532219 0.629463 0.572945\n",
"HyDE_with_reranker 0.539636 0.641390 0.582352\n",
"Only_question 0.540208 0.635625 0.580522\n",
"Only_hypothetical_answer 0.538593 0.642369 0.582680"
],
"text/html": [
"\n",
" <div id=\"df-1fafef96-2bc1-4048-ad16-76b2b9e23bf7\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>f1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>QA_RAG</th>\n",
" <td>0.550603</td>\n",
" <td>0.645380</td>\n",
" <td>0.591428</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Multiquery_questions</th>\n",
" <td>0.532219</td>\n",
" <td>0.629463</td>\n",
" <td>0.572945</td>\n",
" </tr>\n",
" <tr>\n",
" <th>HyDE_with_reranker</th>\n",
" <td>0.539636</td>\n",
" <td>0.641390</td>\n",
" <td>0.582352</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Only_question</th>\n",
" <td>0.540208</td>\n",
" <td>0.635625</td>\n",
" <td>0.580522</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Only_hypothetical_answer</th>\n",
" <td>0.538593</td>\n",
" <td>0.642369</td>\n",
" <td>0.582680</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-1fafef96-2bc1-4048-ad16-76b2b9e23bf7')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-1fafef96-2bc1-4048-ad16-76b2b9e23bf7 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-1fafef96-2bc1-4048-ad16-76b2b9e23bf7');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-32e405b9-713e-40e2-8a2d-0317aa3ee4bc\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-32e405b9-713e-40e2-8a2d-0317aa3ee4bc')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-32e405b9-713e-40e2-8a2d-0317aa3ee4bc button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
]
},
"metadata": {},
"execution_count": 31
}
],
"source": [
"def calculate_statistics(data):\n",
" statistics = {}\n",
" for key in ['precision', 'recall', 'f1']:\n",
" statistics[key] = np.mean(data[key])\n",
" return statistics\n",
"\n",
"Bertscore_Answer_HyDE_with_reranker_stats = calculate_statistics(Bertscore_Answer_HyDE_with_reranker)\n",
"Bertscore_Answer_QA_RAG_stats = calculate_statistics(Bertscore_Answer_QA_RAG)\n",
"Bertscore_Answer_Only_question_stats = calculate_statistics(Bertscore_Answer_Only_question)\n",
"Bertscore_Answer_Multiquery_questions_stats = calculate_statistics(Bertscore_Answer_Multiquery_questions)\n",
"Bertscore_Answer_Only_hypothetical_answer_stats = calculate_statistics(Bertscore_Answer_Only_hypothetical_answer)\n",
"\n",
"stats_df = pd.DataFrame({'QA_RAG':Bertscore_Answer_QA_RAG_stats, 'Multiquery_questions':Bertscore_Answer_Multiquery_questions_stats, 'HyDE_with_reranker':Bertscore_Answer_HyDE_with_reranker_stats, 'Only_question':Bertscore_Answer_Only_question_stats, 'Only_hypothetical_answer':Bertscore_Answer_Only_hypothetical_answer_stats})\n",
"\n",
"stats_df.transpose()"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"machine_shape": "hm",
"provenance": [],
"gpuType": "A100"
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}