{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "GREqJcq4OLO6",
"outputId": "511e84c9-0109-411a-b44e-0801766d0fe9",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
]
}
],
"source": [
"# uncomment if working in colab\n",
"from google.colab import drive\n",
"drive.mount('/content/drive')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "ICZaolBLOLO8",
"outputId": "c8c38b1c-e0e3-4258-cc8b-d7d684d9d089",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: seqeval in /usr/local/lib/python3.10/dist-packages (1.2.2)\n",
"Requirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.25.2)\n",
"Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.2.2)\n",
"Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.11.4)\n",
"Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.4.2)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (3.5.0)\n",
"Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.3.0)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.14.0)\n",
"Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.11.0)\n",
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.12)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.3)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.4)\n",
"Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2023.6.0)\n",
"Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n",
"Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n",
"Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n",
"Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch) (8.9.2.26)\n",
"Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.3.1)\n",
"Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch) (11.0.2.54)\n",
"Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch) (10.3.2.106)\n",
"Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch) (11.4.5.107)\n",
"Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.0.106)\n",
"Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /usr/local/lib/python3.10/dist-packages (from torch) (2.20.5)\n",
"Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n",
"Requirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.3.0)\n",
"Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch) (12.4.127)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.5)\n",
"Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n",
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"
]
}
],
"source": [
"# uncomment if using colab\n",
"!pip install -q -U datasets\n",
"!pip install seqeval\n",
"!pip install -U torch\n",
"!pip install -q -U evaluate\n",
"!pip install -q -U git+https://github.com/huggingface/transformers.git\n",
"# !pip install -q -U transformers\n",
"!pip install -q -U bitsandbytes\n",
"!pip install -q -U git+https://github.com/huggingface/peft.git\n",
"!pip install -q -U git+https://github.com/huggingface/accelerate.git\n",
"# !pip install -q -U accelerate"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "Km86o_uHOLO8"
},
"outputs": [],
"source": [
"import torch\n",
"from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextGenerationPipeline\n",
"import os\n",
"from utils import *"
]
},
{
"cell_type": "code",
"source": [
"import pandas as pd\n",
"from datasets import Dataset, DatasetDict, load_dataset"
],
"metadata": {
"id": "L0zkxHTmOxkJ"
},
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import re"
],
"metadata": {
"id": "uoWkyegpPCOZ"
},
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"id": "L0zE4thvOLO9",
"outputId": "75716581-4307-4d98-fddf-887bc825e808",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 145,
"referenced_widgets": [
"009541513b8e451886782e457d7cd479",
"519375173fe64735a4faccf127abcf90",
"484017270bb646119bc605fccb88fa4e",
"23dc47bf61314784b7ffb42d995bbce9",
"7fdc12ed98be4101a7ff8d15272d5033",
"683aa055aae842adabca4ee3fa9a16bf",
"6ab04465b07747a592cb28f77c6b5e20",
"2379403cd2df4ffd9f7d4eecbf571d7e",
"382eff5973b942678fde4a9c6e9c8150",
"aec5500c07b54365ab252b5746e08127",
"86ed1786dfcc49148bc65e88627a34f3",
"36b52f9261ae4c589efc8faab5806509",
"8c0ca5e87a1243ba94d821d87dfd7cc1",
"1d62a5d167c04e839152933688357dbe",
"20614482956a4d359d7e6470fe3b5236",
"b46a3ac13be24c04a0c47d424e54d256",
"94885d557c64483bbcb842c570e380a5",
"0ca2c9f7ad19437886ef4a9aa74cf952",
"50557c598d9246599d7e894137d2df26",
"ab9a60b256554da0a101dc9d541cbb8c",
"523cf83bffdc4d8bbe06c9d886e5d884",
"0a508a2d17d04b76aa6bad327e25da96",
"7fd41498666942149a04c076f61cb6ad",
"32cfc02171c1420599bbbeca98ebfc3b",
"c72717e7d2714c7aa695cf5b6ba2481c",
"45c84a726d5e470babfb03cd5b139de7",
"ec182a1a030c46ac87b2fa998ea9d868",
"22972fbed4cc4b408be8abe9f9bb2847",
"09fc71a94b2f4766a02b5879ba232e95",
"da47463752ff4288a73b1802d37941aa",
"9e89840302da47e3a383888883bbadea",
"5ecf0ca50841453d85a082217c033fae"
]
}
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"VBox(children=(HTML(value='