{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "QO5obdMWWJJw",
"outputId": "c0b8b68d-1af6-4308-8ea1-873bd428ecd5"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Mounted at /content/drive\n"
]
}
],
"source": [
"# uncomment if working in colab\n",
"from google.colab import drive\n",
"drive.mount('/content/drive')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uXHZTGK9WJJx",
"outputId": "c8a0372f-8774-40df-d275-03e90c2c8848"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting seqeval\n",
" Downloading seqeval-1.2.2.tar.gz (43 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 kB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Requirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.25.2)\n",
"Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.2.2)\n",
"Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.11.4)\n",
"Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.4.0)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (3.4.0)\n",
"Building wheels for collected packages: seqeval\n",
" Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16161 sha256=3d05e378a95e6360b53d3fd878ed43d9796294678465f3f139c5e30bef6ab718\n",
" Stored in directory: /root/.cache/pip/wheels/1a/67/4a/ad4082dd7dfc30f2abfe4d80a2ed5926a506eb8a972b4767fa\n",
"Successfully built seqeval\n",
"Installing collected packages: seqeval\n",
"Successfully installed seqeval-1.2.2\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.1/84.1 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
" Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m119.8/119.8 MB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m297.4/297.4 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Building wheel for peft (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
" Building wheel for accelerate (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"
]
}
],
"source": [
"# uncomment if using colab\n",
"!pip install -q -U datasets\n",
"!pip install seqeval\n",
"!pip install -q -U evaluate\n",
"!pip install -q -U git+https://github.com/huggingface/transformers.git\n",
"!pip install -q -U bitsandbytes\n",
"# !pip install -i https://pypi.org/simple/ bitsandbytes\n",
"!pip install -q -U git+https://github.com/huggingface/peft.git\n",
"!pip install -q -U git+https://github.com/huggingface/accelerate.git"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "iwRXECp_WJJx"
},
"outputs": [],
"source": [
"from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextGenerationPipeline\n",
"import torch\n",
"import accelerate\n",
"import os\n",
"from utils import *"
]
},
{
"cell_type": "code",
"source": [
"import pandas as pd\n",
"from datasets import Dataset, DatasetDict"
],
"metadata": {
"id": "-De8g6lgpWtZ"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from datasets import load_dataset, load_metric"
],
"metadata": {
"id": "e7Bpoz11bRe3"
},
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 159,
"referenced_widgets": [
"0f4e594a6aa64dd697fb841d4207d4b7",
"413c1cd08da84e32ba2de36d891bf86a",
"ba7f75db7de145abb174658862ef50a3",
"d55f2bd66fe044e4816348c1b4b032bf",
"d1fd6600f76a4bf7968b0d13de093148",
"b00aef23e75445c785a6b3b8756b9c94",
"1fa716b65c7f47989184cc24bd56e5bd",
"494b9dacddb948f7b08b509e7a79f3f1",
"e3e2f93363b74e24bf22abcaf021b3dc",
"b607f0954b3844e18ed2a887372d42c5",
"95176ba773274023b4e3356bb3cb4cc9",
"ac4c65187e864fd09e296fcbbe3ad6d8",
"540f2e543376445d849e1a56ac38e4d4",
"a9d9e19fa5ae41109e7c7cf9a4d8a13c",
"4fd000047a0a402194cb3b99bb59d8f9",
"9d08585ce6fd40538dc10d1c5d1aee70",
"f4642fc990e94bf99c7d26017d8771ba",
"9c49b620b2c04b83ae01bf4ab06e7270",
"d47d0c2d528c40a38965fac763e450cf",
"374d4f7c917d48c2b81f24cbfcfce062",
"9f151f163b2c4966b9460dd31e184a84",
"67a9e41b56a741baa4a953905d11c263",
"e32ad767760d4bddb2f3a0a38327339b",
"681dac30b90b4b02b38f9e5bb99429c0",
"dd0df631524e42e48299283f1a9555e9",
"609d5b49e085499994a11342e8dd425f",
"3e7792c5d2484e24950f1f83f899ba4a",
"8fae6bd7ebc54636a14b375fa46ec0dc",
"7fa56a404f8847239109441a3b043a77",
"d34e4794cb944c1cb8ae88845cad54fb",
"4856843799e242ac8b72cf37ad1a159f",
"c3c6d1e8aeb14e999ea3fadc10539861"
]
},
"id": "SMrGGcyPWJJy",
"outputId": "1988c833-06f1-44c2-c832-525b2dabee55"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"VBox(children=(HTML(value='