{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "OmG4urkedeiv",
"outputId": "564d7767-0fd9-4fe4-fa52-e627c853acf7"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Mounted at /content/drive\n"
]
}
],
"source": [
"# uncomment if working in colab\n",
"from google.colab import drive\n",
"drive.mount('/content/drive')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "pG8-9pLtdeiv",
"outputId": "e1e4d250-1316-434b-b534-ef986c0cf754"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m15.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
" Building wheel for accelerate (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
" Building wheel for peft (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m119.8/119.8 MB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting seqeval\n",
" Downloading seqeval-1.2.2.tar.gz (43 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Requirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.25.2)\n",
"Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.2.2)\n",
"Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.11.4)\n",
"Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.4.0)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (3.4.0)\n",
"Building wheels for collected packages: seqeval\n",
" Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16161 sha256=fa0cb1d1af1f5ce87aea7c7a9c03b2c6d8e5ddc09f3f0154bca3aacfddb8730b\n",
" Stored in directory: /root/.cache/pip/wheels/1a/67/4a/ad4082dd7dfc30f2abfe4d80a2ed5926a506eb8a972b4767fa\n",
"Successfully built seqeval\n",
"Installing collected packages: seqeval\n",
"Successfully installed seqeval-1.2.2\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.1/84.1 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h"
]
}
],
"source": [
"# uncomment if using colab\n",
"!pip install -q -U git+https://github.com/huggingface/transformers.git\n",
"!pip install -q -U datasets\n",
"!pip install -q -U git+https://github.com/huggingface/accelerate.git\n",
"!pip install -q -U git+https://github.com/huggingface/peft.git\n",
"!pip install -q -U bitsandbytes\n",
"!pip install seqeval\n",
"!pip install -q -U evaluate"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"id": "cD_ebmTNdeiv"
},
"outputs": [],
"source": [
"import numpy as np\n",
"from transformers import AutoTokenizer, AutoModelForTokenClassification, DataCollatorForTokenClassification, Trainer, TrainingArguments, AutoModelForCausalLM, BitsAndBytesConfig, TextGenerationPipeline\n",
"from datasets import load_dataset, load_metric\n",
"from seqeval.metrics import classification_report\n",
"import pandas as pd\n",
"from datasets import Dataset, DatasetDict, load_dataset\n",
"from seqeval.scheme import IOB2\n",
"import evaluate\n",
"import torch\n",
"from eval_file import *\n",
"from utils import *"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 159,
"referenced_widgets": [
"1f89a6e850d044d5bf0ff42826c9d380",
"4dd06c9a1c2844b098827fa425670805",
"dea4621e41dc4aed896525f0f5d2082f",
"6f3fc00b6eda41778fe7bd287d910258",
"c4909f7aefc8468d87c523bb3e3d8a9c",
"0f26ea65ecc2435ca79e5850a3039231",
"ef02f67c5c6449c1944f1905f36c3eac",
"3f81145a71a44933bd3f941796f56cee",
"a11d59e40d7c42deafe5acd6b4cc9870",
"1b998bb43270416e957b5201b5d89d6c",
"949fd3e1474a48db9f22d6c9b5009a5e",
"92c204333bf14600b38b7674400bf461",
"1340bc1629134745bca3f7a710089038",
"e3103093559b42129f9ccd2a7b629c29",
"7bbce2b680134eeeae6cc36624fb5944",
"3811c4d478584c578d7f04b4de5364b2",
"92eb2778add54b5b8470ea6a4e7cb69b",
"7d72b73db60b4c0c97db1fe318d7b80f",
"d60258b3a0c4458294a38b28751e01ca",
"cea64f9845bc45b78195b1aded785402",
"13d76452759a47c2a159506000b29f99",
"34e8cb0a1ce04f689c763e213b40a5a9",
"6c006df746754052a1d614e093420286",
"048c49c93dcf4f578960c02eef89564a",
"aa89b45433db4a95a24f2992ec216703",
"6fd4ab24b0424c46aea4ac2d0ddd4fd4",
"39da10287e3f4f26ab304e3b24b1ab14",
"22aa08e25b22442f8c08040836e5bfac",
"e33030643b8a447084c935a82958d763",
"a0e9fed85eab46d2bdbdadd7bd87357e",
"e567df5cf7714fdabf680d7051c8720a",
"c4addaa947f54b0cbf23d50611970a9f"
]
},
"id": "RTiSFJvzdeiw",
"outputId": "e379f9c4-8ba5-4671-b444-249fa27120b0"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"VBox(children=(HTML(value='