a b/development/qa-server/FineTune_bert_MLM.ipynb
1
{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":38931,"status":"ok","timestamp":1630910702206,"user":{"displayName":"zahra nafarieh","photoUrl":"","userId":"18240789504970436021"},"user_tz":-270},"id":"3WYYhQg9yMXZ","outputId":"c2115019-f0a9-4ca4-eafc-8322568c606b"},"outputs":[{"name":"stdout","output_type":"stream","text":["Mounted at /content/gdrive\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/gdrive')"]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":38,"status":"ok","timestamp":1630910702210,"user":{"displayName":"zahra nafarieh","photoUrl":"","userId":"18240789504970436021"},"user_tz":-270},"id":"AyBzpc7PyNka","outputId":"bb77136a-c3d5-41d8-c00f-eccb4914a918"},"outputs":[{"name":"stdout","output_type":"stream","text":["/content/gdrive/MyDrive/QA\n"]}],"source":["cd /content/gdrive/MyDrive/QA/"]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":30,"status":"ok","timestamp":1630910702213,"user":{"displayName":"zahra nafarieh","photoUrl":"","userId":"18240789504970436021"},"user_tz":-270},"id":"C4lqTiw8yNqQ","outputId":"6d1ba70c-cc6b-466e-fd66-2037a3c5bf82"},"outputs":[{"name":"stdout","output_type":"stream","text":["Mon Sep  6 06:45:01 2021       \n","+-----------------------------------------------------------------------------+\n","| NVIDIA-SMI 470.63.01    Driver Version: 460.32.03    CUDA Version: 11.2     |\n","|-------------------------------+----------------------+----------------------+\n","| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n","| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n","|                               |                      |               MIG M. |\n","|===============================+======================+======================|\n","|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |\n","| N/A   37C    P8    28W / 149W |      0MiB / 11441MiB |      0%      Default |\n","|                               |                      |                  N/A |\n","+-------------------------------+----------------------+----------------------+\n","                                                                               \n","+-----------------------------------------------------------------------------+\n","| Processes:                                                                  |\n","|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n","|        ID   ID                                                   Usage      |\n","|=============================================================================|\n","|  No running processes found                                                 |\n","+-----------------------------------------------------------------------------+\n"]}],"source":["!nvidia-smi"]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":12427,"status":"ok","timestamp":1630910716380,"user":{"displayName":"zahra nafarieh","photoUrl":"","userId":"18240789504970436021"},"user_tz":-270},"id":"5Z2s-02zymBE","outputId":"b5474e80-c2b5-4867-ea5e-9d2a3934ef55"},"outputs":[{"name":"stdout","output_type":"stream","text":["\u001b[K     |████████████████████████████████| 2.8 MB 5.3 MB/s \n","\u001b[K     |████████████████████████████████| 50 kB 6.4 MB/s \n","\u001b[K     |████████████████████████████████| 895 kB 38.2 MB/s \n","\u001b[K     |████████████████████████████████| 636 kB 45.2 MB/s \n","\u001b[K     |████████████████████████████████| 3.3 MB 35.3 MB/s \n","\u001b[K     |████████████████████████████████| 264 kB 5.2 MB/s \n","\u001b[K     |████████████████████████████████| 243 kB 44.3 MB/s \n","\u001b[K     |████████████████████████████████| 119 kB 44.6 MB/s \n","\u001b[?25h"]}],"source":["!pip install -q transformers\n","!pip install -q datasets"]},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":27,"status":"ok","timestamp":1630910716384,"user":{"displayName":"zahra nafarieh","photoUrl":"","userId":"18240789504970436021"},"user_tz":-270},"id":"O2ZTSGwnymEU","outputId":"0ce06dae-15cc-4d1b-b1ee-19d263831541"},"outputs":[{"name":"stdout","output_type":"stream","text":["/content/gdrive/MyDrive/QA\n"]}],"source":["cd /content/gdrive/MyDrive/QA/"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":853,"status":"ok","timestamp":1630903995823,"user":{"displayName":"zahra nafarieh","photoUrl":"","userId":"04418740885995581054"},"user_tz":-270},"id":"wjOqZ_xm2gfF","outputId":"24196978-c1ce-45e7-a336-a8c519b9d810"},"outputs":[{"name":"stdout","output_type":"stream","text":["fatal: destination path 'transformers' already exists and is not an empty directory.\n"]}],"source":["#https://huggingface.co/transformers/examples.html\n","#https://huggingface.co/transformers/examples.html\n","!git clone https://github.com/huggingface/transformers.git"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true,"base_uri":"https://localhost:8080/"},"id":"wk1u5T-hdoBN"},"outputs":[{"name":"stdout","output_type":"stream","text":["Collecting git+https://github.com/huggingface/transformers\n","  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-jep4a5qi\n","  Running command git clone -q https://github.com/huggingface/transformers /tmp/pip-req-build-jep4a5qi\n","  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n","  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n","    Preparing wheel metadata ... \u001b[?25l\u001b[?25hdone\n","Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers==4.11.0.dev0) (2.23.0)\n","Requirement already satisfied: pyyaml\u003e=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers==4.11.0.dev0) (5.4.1)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers==4.11.0.dev0) (3.0.12)\n","Requirement already satisfied: huggingface-hub\u003e=0.0.12 in /usr/local/lib/python3.7/dist-packages (from transformers==4.11.0.dev0) (0.0.16)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.11.0.dev0) (2019.12.20)\n","Requirement already satisfied: tokenizers\u003c0.11,\u003e=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers==4.11.0.dev0) (0.10.3)\n","Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers==4.11.0.dev0) (21.0)\n","Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers==4.11.0.dev0) (4.6.4)\n","Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers==4.11.0.dev0) (0.0.45)\n","Requirement already satisfied: tqdm\u003e=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers==4.11.0.dev0) (4.62.0)\n","Requirement already satisfied: numpy\u003e=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.11.0.dev0) (1.19.5)\n","Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from huggingface-hub\u003e=0.0.12-\u003etransformers==4.11.0.dev0) (3.7.4.3)\n","Requirement already satisfied: pyparsing\u003e=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging-\u003etransformers==4.11.0.dev0) (2.4.7)\n","Requirement already satisfied: zipp\u003e=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata-\u003etransformers==4.11.0.dev0) (3.5.0)\n","Requirement already satisfied: certifi\u003e=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests-\u003etransformers==4.11.0.dev0) (2021.5.30)\n","Requirement already satisfied: chardet\u003c4,\u003e=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests-\u003etransformers==4.11.0.dev0) (3.0.4)\n","Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,\u003c1.26,\u003e=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests-\u003etransformers==4.11.0.dev0) (1.24.3)\n","Requirement already satisfied: idna\u003c3,\u003e=2.5 in /usr/local/lib/python3.7/dist-packages (from requests-\u003etransformers==4.11.0.dev0) (2.10)\n","Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses-\u003etransformers==4.11.0.dev0) (7.1.2)\n","Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses-\u003etransformers==4.11.0.dev0) (1.15.0)\n","Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses-\u003etransformers==4.11.0.dev0) (1.0.1)\n","Building wheels for collected packages: transformers\n","  Building wheel for transformers (PEP 517) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for transformers: filename=transformers-4.11.0.dev0-py3-none-any.whl size=2813231 sha256=603a821927b529d18464d2d90a02be3e2271821ed6918e52dd0d9f9e52e320c7\n","  Stored in directory: /tmp/pip-ephem-wheel-cache-iskligsc/wheels/35/2e/a7/d819e3310040329f0f47e57c9e3e7a7338aa5e74c49acfe522\n","Successfully built transformers\n","Installing collected packages: transformers\n","  Attempting uninstall: transformers\n","    Found existing installation: transformers 4.10.0\n","    Uninstalling transformers-4.10.0:\n","      Successfully uninstalled transformers-4.10.0\n","Successfully installed transformers-4.11.0.dev0\n","Requirement already satisfied: datasets in /usr/local/lib/python3.7/dist-packages (1.11.0)\n","Requirement already satisfied: requests\u003e=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (2.23.0)\n","Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets) (0.3.4)\n","Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets) (0.70.12.2)\n","Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from datasets) (4.6.4)\n","Requirement already satisfied: xxhash in /usr/local/lib/python3.7/dist-packages (from datasets) (2.0.2)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets) (1.1.5)\n","Requirement already satisfied: huggingface-hub\u003c0.1.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (0.0.16)\n","Requirement already satisfied: fsspec\u003e=2021.05.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (2021.8.1)\n","Requirement already satisfied: tqdm\u003e=4.42 in /usr/local/lib/python3.7/dist-packages (from datasets) (4.62.0)\n","Requirement already satisfied: numpy\u003e=1.17 in /usr/local/lib/python3.7/dist-packages (from datasets) (1.19.5)\n","Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets) (21.0)\n","Requirement already satisfied: pyarrow!=4.0.0,\u003e=1.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (3.0.0)\n","Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from huggingface-hub\u003c0.1.0-\u003edatasets) (3.7.4.3)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from huggingface-hub\u003c0.1.0-\u003edatasets) (3.0.12)\n","Requirement already satisfied: pyparsing\u003e=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging-\u003edatasets) (2.4.7)\n","Requirement already satisfied: certifi\u003e=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests\u003e=2.19.0-\u003edatasets) (2021.5.30)\n","Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,\u003c1.26,\u003e=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests\u003e=2.19.0-\u003edatasets) (1.24.3)\n","Requirement already satisfied: chardet\u003c4,\u003e=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests\u003e=2.19.0-\u003edatasets) (3.0.4)\n","Requirement already satisfied: idna\u003c3,\u003e=2.5 in /usr/local/lib/python3.7/dist-packages (from requests\u003e=2.19.0-\u003edatasets) (2.10)\n","Requirement already satisfied: zipp\u003e=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata-\u003edatasets) (3.5.0)\n","Requirement already satisfied: python-dateutil\u003e=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas-\u003edatasets) (2.8.2)\n","Requirement already satisfied: pytz\u003e=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas-\u003edatasets) (2018.9)\n","Requirement already satisfied: six\u003e=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil\u003e=2.7.3-\u003epandas-\u003edatasets) (1.15.0)\n","Cloning into 'transformers'...\n","remote: Enumerating objects: 83486, done.\u001b[K\n","remote: Total 83486 (delta 0), reused 0 (delta 0), pack-reused 83486\u001b[K\n","Receiving objects: 100% (83486/83486), 66.65 MiB | 6.03 MiB/s, done.\n","Resolving deltas: 100% (59922/59922), done.\n","Checking out files: 100% (1544/1544), done.\n"]}],"source":["! pip install git+https://github.com/huggingface/transformers\n","! pip install datasets\n","\n","!git clone https://github.com/huggingface/transformers.git"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":760,"status":"ok","timestamp":1630904043895,"user":{"displayName":"zahra nafarieh","photoUrl":"","userId":"04418740885995581054"},"user_tz":-270},"id":"E0TvCo172h5u","outputId":"95b49d16-ed7c-4c63-c7a5-82fc8e68b83c"},"outputs":[{"name":"stdout","output_type":"stream","text":["/content/gdrive/MyDrive/QA/transformers\n"]}],"source":["cd /content/gdrive/MyDrive/QA/transformers"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":56171,"status":"ok","timestamp":1630904100824,"user":{"displayName":"zahra nafarieh","photoUrl":"","userId":"04418740885995581054"},"user_tz":-270},"id":"HotPoj2x2pMZ","outputId":"11158552-33a2-4626-9eb0-9bb71fce526f"},"outputs":[{"name":"stdout","output_type":"stream","text":["Processing /content/gdrive/MyDrive/QA/transformers\n","\u001b[33m  DEPRECATION: A future pip version will change local packages to be built in-place without first copying to a temporary directory. We recommend you use --use-feature=in-tree-build to test your packages with this new behavior before it becomes the default.\n","   pip 21.3 will remove support for this functionality. You can find discussion regarding this at https://github.com/pypa/pip/issues/7555.\u001b[0m\n","\u001b[31mERROR: Operation cancelled by user\u001b[0m\n"]}],"source":["# !pip install ."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true},"id":"iaUOyEQ4-0vX"},"outputs":[{"name":"stdout","output_type":"stream","text":["/content/gdrive/MyDrive/QA\n"]}],"source":["cd /content/gdrive/MyDrive/QA/"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"CvnECAgl5U4c"},"outputs":[],"source":["!python run_mlm.py \\\n","    --model_name_or_path bert-base-uncased \\\n","    --tokenizer_name bert-base-uncased \\\n","    --train_file all_sentences.txt \\\n","    --validation_split_percentage 10 \\\n","    --max_seq_length 128 \\\n","    --do_train \\\n","    --do_eval \\\n","    --learning_rate 2e-5 \\\n","    --num_train_epochs 2.0 \\\n","    --per_device_train_batch_size 10 \\\n","    --save_steps 1000 \\\n","    --save_total_limit 2 \\\n","    --ignore_data_skip \\\n","    --cache_dir /content/gdrive/MyDrive/cache_dir/ \\\n","    --output_dir /content/gdrive/MyDrive/output_dir/ \\\n","    --overwrite_output_dir yes"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"FzBVCkWqJhJU"},"outputs":[],"source":[""]},{"cell_type":"code","execution_count":null,"metadata":{"id":"lpNFl99qLK-I"},"outputs":[],"source":[""]},{"cell_type":"code","execution_count":null,"metadata":{"id":"s7dLmunxLQW-"},"outputs":[],"source":[""]},{"cell_type":"code","execution_count":null,"metadata":{"id":"KVjJ9mooLQjt"},"outputs":[],"source":[""]},{"cell_type":"code","execution_count":null,"metadata":{"id":"pF2mpuiuLQsW"},"outputs":[],"source":[""]},{"cell_type":"code","execution_count":null,"metadata":{"id":"4xoepS-HLQ0c"},"outputs":[],"source":[""]},{"cell_type":"code","execution_count":null,"metadata":{"id":"uaBQ6DGjLLD-"},"outputs":[],"source":[""]},{"cell_type":"code","execution_count":null,"metadata":{"id":"EUAcYUiMLLHE"},"outputs":[],"source":[""]},{"cell_type":"code","execution_count":null,"metadata":{"id":"sstEqHeuLLKd"},"outputs":[],"source":[""]},{"cell_type":"code","execution_count":null,"metadata":{"id":"mZYkAEQZLLNg"},"outputs":[],"source":[""]}],"metadata":{"accelerator":"GPU","colab":{"collapsed_sections":[],"name":"FineTune_bert.ipynb","version":""},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0}