{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "OmG4urkedeiv",
"outputId": "87a33dc4-f118-45a7-c8aa-aab80e9c76ca",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
]
}
],
"source": [
"# uncomment if working in colab\n",
"from google.colab import drive\n",
"drive.mount('/content/drive')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "pG8-9pLtdeiv",
"outputId": "d3c710c2-a443-4b3b-db20-2fafe6746f0d",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
" Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m10.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m17.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
" Building wheel for accelerate (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
"Collecting seqeval\n",
" Downloading seqeval-1.2.2.tar.gz (43 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 kB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Requirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.25.2)\n",
"Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.2.2)\n",
"Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.11.4)\n",
"Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.4.0)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (3.4.0)\n",
"Building wheels for collected packages: seqeval\n",
" Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16161 sha256=b571c9c4836027705ada02b905790e6d0b00dff2c033b522f11aeb9c3d0d66ed\n",
" Stored in directory: /root/.cache/pip/wheels/1a/67/4a/ad4082dd7dfc30f2abfe4d80a2ed5926a506eb8a972b4767fa\n",
"Successfully built seqeval\n",
"Installing collected packages: seqeval\n",
"Successfully installed seqeval-1.2.2\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.1/84.1 kB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h"
]
}
],
"source": [
"# uncomment if using colab\n",
"!pip install -q -U git+https://github.com/huggingface/transformers.git\n",
"!pip install -q -U datasets\n",
"!pip install -q -U git+https://github.com/huggingface/accelerate.git\n",
"!pip install seqeval\n",
"!pip install -q -U evaluate"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "cD_ebmTNdeiv"
},
"outputs": [],
"source": [
"import numpy as np\n",
"from transformers import AutoTokenizer, AutoModelForTokenClassification, DataCollatorForTokenClassification, Trainer, TrainingArguments\n",
"from datasets import load_dataset, load_metric\n",
"from seqeval.metrics import classification_report\n",
"from seqeval.scheme import IOB2\n",
"import evaluate\n",
"import torch"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"id": "RTiSFJvzdeiw",
"outputId": "ed96941b-ab02-41bb-da96-c668cb8aac43",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 145,
"referenced_widgets": [
"aae03eda481e488eaf1ef5b0610cdc0b",
"034ac69beaf14927bb2800c7e848ecbb",
"0c7e1ad3261b4ff4876a9a8654fc9464",
"306bc801a94541999e50632fbc3f4c0f",
"36dbf510f6fc446383291658d394066a",
"77affba1207543b483febb7acb086298",
"d2605acf97fa45ad8b157868a831e4f7",
"c116ed771b584a1398d9200fd3a8eba0",
"f3a413d8c726471295e252f3ea122f31",
"2e4c44c84e464804b1c757f88e46d506",
"719dfc3140194a839f84e6efe01e01b0",
"7e0e3e6216db4347a7de8f4553dd6282",
"7c10261a70ee4e7184524cc2590a28c1",
"5b93a9aedb2f49d88e5e1d9cdb13c751",
"5ad14d3ac4614dcc903d60dcb4edbb88",
"c218b57c46fc4f5eb9260fd08c5a94c8",
"e377b67ff2a948e3be138334ed3de886",
"d9a4367195b94edc9d1ba97d7b0a2eb8",
"aa9ffe65a42643c69f553f283cdf6772",
"8a95f6bbd81148519ec4639d8b0f4db6",
"13acbfe9fc7a4ff4a6ec0a8be56c4f62",
"e7bcd126a8384f94a027e130d09f2346",
"ee0ac8c2fac44c02b28844bb5bc6822a",
"d1444855a9cc4726a37cf9f184be8409",
"baa7f4149cb446078d05574550f33cda",
"002b9e99a05c46749a052c5770f280a5",
"33a233aa855f4356960aa336d361bfdb",
"b111ac45f0d648a8b89e1bf3d26dc354",
"7c0010ffc2b84b7cb1c2e943e849a67b",
"4a0ecc50b9e94408b714c578a0786f03",
"f67c37bb203c4775b5997e50af97fae7",
"8fe86ac5511c400d81c52bd335c96859"
]
}
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"VBox(children=(HTML(value='