[b160a6]: / finetune.ipynb

Download this file

1787 lines (1786 with data), 195.8 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup the environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mounted at /content/drive\n"
     ]
    }
   ],
   "source": [
    "# mount to your Google Drive\n",
    "from google.colab import drive\n",
    "drive.mount('/content/drive')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# navigate to project folder\n",
    "%cd '/content/drive/My Drive/ML-Quiz-XRay-ReportGeneration'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "import uuid\n",
    "from PIL import Image"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Install Llava and DeepSpeed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "fatal: destination path 'LLaVA' already exists and is not an empty directory.\n",
      "Requirement already satisfied: pip in /usr/local/lib/python3.10/dist-packages (24.1.2)\n",
      "Collecting pip\n",
      "  Using cached pip-24.2-py3-none-any.whl.metadata (3.6 kB)\n",
      "Using cached pip-24.2-py3-none-any.whl (1.8 MB)\n",
      "Installing collected packages: pip\n",
      "  Attempting uninstall: pip\n",
      "    Found existing installation: pip 24.1.2\n",
      "    Uninstalling pip-24.1.2:\n",
      "      Successfully uninstalled pip-24.1.2\n",
      "Successfully installed pip-24.2\n",
      "Obtaining file:///content/drive/My%20Drive/UHN_Test/LLaVA\n",
      "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
      "  Checking if build backend supports build_editable ... \u001b[?25l\u001b[?25hdone\n",
      "  Getting requirements to build editable ... \u001b[?25l\u001b[?25hdone\n",
      "  Preparing editable metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
      "Collecting torch==2.1.2 (from llava==1.2.2.post1)\n",
      "  Downloading torch-2.1.2-cp310-cp310-manylinux1_x86_64.whl.metadata (25 kB)\n",
      "Collecting torchvision==0.16.2 (from llava==1.2.2.post1)\n",
      "  Downloading torchvision-0.16.2-cp310-cp310-manylinux1_x86_64.whl.metadata (6.6 kB)\n",
      "Collecting transformers==4.37.2 (from llava==1.2.2.post1)\n",
      "  Downloading transformers-4.37.2-py3-none-any.whl.metadata (129 kB)\n",
      "Collecting tokenizers==0.15.1 (from llava==1.2.2.post1)\n",
      "  Downloading tokenizers-0.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n",
      "Requirement already satisfied: sentencepiece==0.1.99 in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (0.1.99)\n",
      "Collecting shortuuid (from llava==1.2.2.post1)\n",
      "  Downloading shortuuid-1.0.13-py3-none-any.whl.metadata (5.8 kB)\n",
      "Collecting accelerate==0.21.0 (from llava==1.2.2.post1)\n",
      "  Downloading accelerate-0.21.0-py3-none-any.whl.metadata (17 kB)\n",
      "Collecting peft (from llava==1.2.2.post1)\n",
      "  Downloading peft-0.12.0-py3-none-any.whl.metadata (13 kB)\n",
      "Collecting bitsandbytes (from llava==1.2.2.post1)\n",
      "  Downloading bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)\n",
      "Requirement already satisfied: pydantic in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (2.9.2)\n",
      "Collecting markdown2[all] (from llava==1.2.2.post1)\n",
      "  Downloading markdown2-2.5.0-py2.py3-none-any.whl.metadata (2.2 kB)\n",
      "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (1.26.4)\n",
      "Collecting scikit-learn==1.2.2 (from llava==1.2.2.post1)\n",
      "  Downloading scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n",
      "Collecting gradio==4.16.0 (from llava==1.2.2.post1)\n",
      "  Downloading gradio-4.16.0-py3-none-any.whl.metadata (15 kB)\n",
      "Collecting gradio-client==0.8.1 (from llava==1.2.2.post1)\n",
      "  Downloading gradio_client-0.8.1-py3-none-any.whl.metadata (7.1 kB)\n",
      "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (2.32.3)\n",
      "Collecting httpx==0.24.0 (from llava==1.2.2.post1)\n",
      "  Downloading httpx-0.24.0-py3-none-any.whl.metadata (8.1 kB)\n",
      "Collecting uvicorn (from llava==1.2.2.post1)\n",
      "  Downloading uvicorn-0.30.6-py3-none-any.whl.metadata (6.6 kB)\n",
      "Collecting fastapi (from llava==1.2.2.post1)\n",
      "  Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)\n",
      "Collecting einops==0.6.1 (from llava==1.2.2.post1)\n",
      "  Downloading einops-0.6.1-py3-none-any.whl.metadata (12 kB)\n",
      "Collecting einops-exts==0.0.4 (from llava==1.2.2.post1)\n",
      "  Downloading einops_exts-0.0.4-py3-none-any.whl.metadata (621 bytes)\n",
      "Collecting timm==0.6.13 (from llava==1.2.2.post1)\n",
      "  Downloading timm-0.6.13-py3-none-any.whl.metadata (38 kB)\n",
      "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate==0.21.0->llava==1.2.2.post1) (24.1)\n",
      "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate==0.21.0->llava==1.2.2.post1) (5.9.5)\n",
      "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate==0.21.0->llava==1.2.2.post1) (6.0.2)\n",
      "Collecting aiofiles<24.0,>=22.0 (from gradio==4.16.0->llava==1.2.2.post1)\n",
      "  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)\n",
      "Requirement already satisfied: altair<6.0,>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (4.2.2)\n",
      "Collecting ffmpy (from gradio==4.16.0->llava==1.2.2.post1)\n",
      "  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)\n",
      "Requirement already satisfied: huggingface-hub>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (0.24.7)\n",
      "Requirement already satisfied: importlib-resources<7.0,>=1.3 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (6.4.5)\n",
      "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (3.1.4)\n",
      "Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (2.1.5)\n",
      "Requirement already satisfied: matplotlib~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (3.7.1)\n",
      "Collecting orjson~=3.0 (from gradio==4.16.0->llava==1.2.2.post1)\n",
      "  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)\n",
      "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (2.1.4)\n",
      "Requirement already satisfied: pillow<11.0,>=8.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (10.4.0)\n",
      "Collecting pydub (from gradio==4.16.0->llava==1.2.2.post1)\n",
      "  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n",
      "Collecting python-multipart (from gradio==4.16.0->llava==1.2.2.post1)\n",
      "  Downloading python_multipart-0.0.9-py3-none-any.whl.metadata (2.5 kB)\n",
      "Collecting ruff>=0.1.7 (from gradio==4.16.0->llava==1.2.2.post1)\n",
      "  Downloading ruff-0.6.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)\n",
      "Collecting semantic-version~=2.0 (from gradio==4.16.0->llava==1.2.2.post1)\n",
      "  Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)\n",
      "Collecting tomlkit==0.12.0 (from gradio==4.16.0->llava==1.2.2.post1)\n",
      "  Downloading tomlkit-0.12.0-py3-none-any.whl.metadata (2.7 kB)\n",
      "Requirement already satisfied: typer<1.0,>=0.9 in /usr/local/lib/python3.10/dist-packages (from typer[all]<1.0,>=0.9->gradio==4.16.0->llava==1.2.2.post1) (0.12.5)\n",
      "Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (4.12.2)\n",
      "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from gradio-client==0.8.1->llava==1.2.2.post1) (2024.6.1)\n",
      "Collecting websockets<12.0,>=10.0 (from gradio-client==0.8.1->llava==1.2.2.post1)\n",
      "  Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
      "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx==0.24.0->llava==1.2.2.post1) (2024.8.30)\n",
      "Collecting httpcore<0.18.0,>=0.15.0 (from httpx==0.24.0->llava==1.2.2.post1)\n",
      "  Downloading httpcore-0.17.3-py3-none-any.whl.metadata (18 kB)\n",
      "Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx==0.24.0->llava==1.2.2.post1) (3.10)\n",
      "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx==0.24.0->llava==1.2.2.post1) (1.3.1)\n",
      "Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.2.2->llava==1.2.2.post1) (1.13.1)\n",
      "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.2.2->llava==1.2.2.post1) (1.4.2)\n",
      "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.2.2->llava==1.2.2.post1) (3.5.0)\n",
      "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (3.16.1)\n",
      "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (1.13.2)\n",
      "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (3.3)\n",
      "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.1.2->llava==1.2.2.post1)\n",
      "  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n",
      "Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.1.2->llava==1.2.2.post1)\n",
      "  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n",
      "Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.1.2->llava==1.2.2.post1)\n",
      "  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n",
      "Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch==2.1.2->llava==1.2.2.post1)\n",
      "  Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n",
      "Collecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.1.2->llava==1.2.2.post1)\n",
      "  Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n",
      "Collecting nvidia-cufft-cu12==11.0.2.54 (from torch==2.1.2->llava==1.2.2.post1)\n",
      "  Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n",
      "Collecting nvidia-curand-cu12==10.3.2.106 (from torch==2.1.2->llava==1.2.2.post1)\n",
      "  Downloading nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n",
      "Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch==2.1.2->llava==1.2.2.post1)\n",
      "  Downloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n",
      "Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch==2.1.2->llava==1.2.2.post1)\n",
      "  Downloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n",
      "Collecting nvidia-nccl-cu12==2.18.1 (from torch==2.1.2->llava==1.2.2.post1)\n",
      "  Downloading nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl.metadata (1.8 kB)\n",
      "Collecting nvidia-nvtx-cu12==12.1.105 (from torch==2.1.2->llava==1.2.2.post1)\n",
      "  Downloading nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.7 kB)\n",
      "Collecting triton==2.1.0 (from torch==2.1.2->llava==1.2.2.post1)\n",
      "  Downloading triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB)\n",
      "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.37.2->llava==1.2.2.post1) (2024.9.11)\n",
      "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.37.2->llava==1.2.2.post1) (0.4.5)\n",
      "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers==4.37.2->llava==1.2.2.post1) (4.66.5)\n",
      "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch==2.1.2->llava==1.2.2.post1)\n",
      "  Downloading nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic->llava==1.2.2.post1) (0.7.0)\n",
      "Requirement already satisfied: pydantic-core==2.23.4 in /usr/local/lib/python3.10/dist-packages (from pydantic->llava==1.2.2.post1) (2.23.4)\n",
      "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn->llava==1.2.2.post1) (8.1.7)\n",
      "Collecting h11>=0.8 (from uvicorn->llava==1.2.2.post1)\n",
      "  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\n",
      "Collecting starlette<0.39.0,>=0.37.2 (from fastapi->llava==1.2.2.post1)\n",
      "  Downloading starlette-0.38.5-py3-none-any.whl.metadata (6.0 kB)\n",
      "Requirement already satisfied: pygments>=2.7.3 in /usr/local/lib/python3.10/dist-packages (from markdown2[all]->llava==1.2.2.post1) (2.18.0)\n",
      "Collecting wavedrom (from markdown2[all]->llava==1.2.2.post1)\n",
      "  Downloading wavedrom-2.0.3.post3.tar.gz (137 kB)\n",
      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "Collecting latex2mathml (from markdown2[all]->llava==1.2.2.post1)\n",
      "  Downloading latex2mathml-3.77.0-py3-none-any.whl.metadata (14 kB)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->llava==1.2.2.post1) (3.3.2)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->llava==1.2.2.post1) (2.0.7)\n",
      "Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio==4.16.0->llava==1.2.2.post1) (0.4)\n",
      "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio==4.16.0->llava==1.2.2.post1) (4.23.0)\n",
      "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio==4.16.0->llava==1.2.2.post1) (0.12.1)\n",
      "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.10/dist-packages (from httpcore<0.18.0,>=0.15.0->httpx==0.24.0->llava==1.2.2.post1) (3.7.1)\n",
      "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.16.0->llava==1.2.2.post1) (1.3.0)\n",
      "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.16.0->llava==1.2.2.post1) (0.12.1)\n",
      "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.16.0->llava==1.2.2.post1) (4.53.1)\n",
      "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.16.0->llava==1.2.2.post1) (1.4.7)\n",
      "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.16.0->llava==1.2.2.post1) (3.1.4)\n",
      "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.16.0->llava==1.2.2.post1) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio==4.16.0->llava==1.2.2.post1) (2024.2)\n",
      "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio==4.16.0->llava==1.2.2.post1) (2024.1)\n",
      "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.9->typer[all]<1.0,>=0.9->gradio==4.16.0->llava==1.2.2.post1) (1.5.4)\n",
      "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.9->typer[all]<1.0,>=0.9->gradio==4.16.0->llava==1.2.2.post1) (13.8.1)\n",
      "\u001b[33mWARNING: typer 0.12.5 does not provide the extra 'all'\u001b[0m\u001b[33m\n",
      "\u001b[0mRequirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch==2.1.2->llava==1.2.2.post1) (1.3.0)\n",
      "Collecting svgwrite (from wavedrom->markdown2[all]->llava==1.2.2.post1)\n",
      "  Downloading svgwrite-1.4.3-py3-none-any.whl.metadata (8.8 kB)\n",
      "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from wavedrom->markdown2[all]->llava==1.2.2.post1) (1.16.0)\n",
      "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5.0,>=3.0->httpcore<0.18.0,>=0.15.0->httpx==0.24.0->llava==1.2.2.post1) (1.2.2)\n",
      "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==4.16.0->llava==1.2.2.post1) (24.2.0)\n",
      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==4.16.0->llava==1.2.2.post1) (2023.12.1)\n",
      "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==4.16.0->llava==1.2.2.post1) (0.35.1)\n",
      "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==4.16.0->llava==1.2.2.post1) (0.20.0)\n",
      "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.9->typer[all]<1.0,>=0.9->gradio==4.16.0->llava==1.2.2.post1) (3.0.0)\n",
      "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.9->typer[all]<1.0,>=0.9->gradio==4.16.0->llava==1.2.2.post1) (0.1.2)\n",
      "Downloading accelerate-0.21.0-py3-none-any.whl (244 kB)\n",
      "Downloading einops-0.6.1-py3-none-any.whl (42 kB)\n",
      "Downloading einops_exts-0.0.4-py3-none-any.whl (3.9 kB)\n",
      "Downloading gradio-4.16.0-py3-none-any.whl (16.7 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.7/16.7 MB\u001b[0m \u001b[31m119.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading gradio_client-0.8.1-py3-none-any.whl (305 kB)\n",
      "Downloading httpx-0.24.0-py3-none-any.whl (75 kB)\n",
      "Downloading scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.6 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.6/9.6 MB\u001b[0m \u001b[31m167.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading timm-0.6.13-py3-none-any.whl (549 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m549.1/549.1 kB\u001b[0m \u001b[31m24.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading tokenizers-0.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m108.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading torch-2.1.2-cp310-cp310-manylinux1_x86_64.whl (670.2 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m670.2/670.2 MB\u001b[0m \u001b[31m28.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading torchvision-0.16.2-cp310-cp310-manylinux1_x86_64.whl (6.8 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m152.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading transformers-4.37.2-py3-none-any.whl (8.4 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.4/8.4 MB\u001b[0m \u001b[31m88.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.6/410.6 MB\u001b[0m \u001b[31m48.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.1/14.1 MB\u001b[0m \u001b[31m174.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m162.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m823.6/823.6 kB\u001b[0m \u001b[31m42.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m731.7/731.7 MB\u001b[0m \u001b[31m23.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.6/121.6 MB\u001b[0m \u001b[31m114.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 MB\u001b[0m \u001b[31m89.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.2/124.2 MB\u001b[0m \u001b[31m85.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m109.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl (209.8 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m209.8/209.8 MB\u001b[0m \u001b[31m40.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
      "Downloading tomlkit-0.12.0-py3-none-any.whl (37 kB)\n",
      "Downloading triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (89.2 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m89.2/89.2 MB\u001b[0m \u001b[31m69.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading uvicorn-0.30.6-py3-none-any.whl (62 kB)\n",
      "Downloading bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl (137.5 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m137.5/137.5 MB\u001b[0m \u001b[31m109.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading fastapi-0.115.0-py3-none-any.whl (94 kB)\n",
      "Downloading peft-0.12.0-py3-none-any.whl (296 kB)\n",
      "Downloading shortuuid-1.0.13-py3-none-any.whl (10 kB)\n",
      "Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n",
      "Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n",
      "Downloading httpcore-0.17.3-py3-none-any.whl (74 kB)\n",
      "Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (141 kB)\n",
      "Downloading ruff-0.6.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.9 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.9/11.9 MB\u001b[0m \u001b[31m112.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n",
      "Downloading starlette-0.38.5-py3-none-any.whl (71 kB)\n",
      "Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n",
      "Downloading ffmpy-0.4.0-py3-none-any.whl (5.8 kB)\n",
      "Downloading latex2mathml-3.77.0-py3-none-any.whl (73 kB)\n",
      "Downloading markdown2-2.5.0-py2.py3-none-any.whl (47 kB)\n",
      "Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
      "Downloading python_multipart-0.0.9-py3-none-any.whl (22 kB)\n",
      "Downloading nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_x86_64.whl (19.7 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.7/19.7 MB\u001b[0m \u001b[31m153.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading svgwrite-1.4.3-py3-none-any.whl (67 kB)\n",
      "Building wheels for collected packages: llava, wavedrom\n",
      "  Building editable for llava (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
      "  Created wheel for llava: filename=llava-1.2.2.post1-0.editable-py3-none-any.whl size=17882 sha256=cad8e6274bda4f7cd67b389afa6cd1db1a3aad3cfbdf6479b7b53a7225c0042e\n",
      "  Stored in directory: /tmp/pip-ephem-wheel-cache-zepfo7m4/wheels/c7/30/87/7e6fd0ab42125993e0f0fda735294a1aa7c337e2cc4b09d09b\n",
      "  Building wheel for wavedrom (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "  Created wheel for wavedrom: filename=wavedrom-2.0.3.post3-py2.py3-none-any.whl size=30052 sha256=cbee9cb59de3a847c5e2be6170ca77a2c80ccde546b55b3500d455c5812e4b6b\n",
      "  Stored in directory: /root/.cache/pip/wheels/9c/52/8c/38b454b42f712f325e26f633287484c7dc1ad469e1580c5954\n",
      "Successfully built llava wavedrom\n",
      "Installing collected packages: pydub, websockets, triton, tomlkit, svgwrite, shortuuid, semantic-version, ruff, python-multipart, orjson, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, markdown2, latex2mathml, h11, ffmpy, einops, aiofiles, wavedrom, uvicorn, starlette, scikit-learn, nvidia-cusparse-cu12, nvidia-cudnn-cu12, httpcore, einops-exts, tokenizers, nvidia-cusolver-cu12, httpx, fastapi, transformers, torch, gradio-client, torchvision, gradio, bitsandbytes, accelerate, timm, peft, llava\n",
      "  Attempting uninstall: nvidia-nccl-cu12\n",
      "    Found existing installation: nvidia-nccl-cu12 2.23.4\n",
      "    Uninstalling nvidia-nccl-cu12-2.23.4:\n",
      "      Successfully uninstalled nvidia-nccl-cu12-2.23.4\n",
      "  Attempting uninstall: einops\n",
      "    Found existing installation: einops 0.8.0\n",
      "    Uninstalling einops-0.8.0:\n",
      "      Successfully uninstalled einops-0.8.0\n",
      "  Attempting uninstall: scikit-learn\n",
      "    Found existing installation: scikit-learn 1.3.2\n",
      "    Uninstalling scikit-learn-1.3.2:\n",
      "      Successfully uninstalled scikit-learn-1.3.2\n",
      "  Attempting uninstall: tokenizers\n",
      "    Found existing installation: tokenizers 0.19.1\n",
      "    Uninstalling tokenizers-0.19.1:\n",
      "      Successfully uninstalled tokenizers-0.19.1\n",
      "  Attempting uninstall: transformers\n",
      "    Found existing installation: transformers 4.44.2\n",
      "    Uninstalling transformers-4.44.2:\n",
      "      Successfully uninstalled transformers-4.44.2\n",
      "  Attempting uninstall: torch\n",
      "    Found existing installation: torch 2.4.1+cu121\n",
      "    Uninstalling torch-2.4.1+cu121:\n",
      "      Successfully uninstalled torch-2.4.1+cu121\n",
      "  Attempting uninstall: torchvision\n",
      "    Found existing installation: torchvision 0.19.1+cu121\n",
      "    Uninstalling torchvision-0.19.1+cu121:\n",
      "      Successfully uninstalled torchvision-0.19.1+cu121\n",
      "  Attempting uninstall: accelerate\n",
      "    Found existing installation: accelerate 0.34.2\n",
      "    Uninstalling accelerate-0.34.2:\n",
      "      Successfully uninstalled accelerate-0.34.2\n",
      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
      "torchaudio 2.4.1+cu121 requires torch==2.4.1, but you have torch 2.1.2 which is incompatible.\u001b[0m\u001b[31m\n",
      "\u001b[0mSuccessfully installed accelerate-0.21.0 aiofiles-23.2.1 bitsandbytes-0.43.3 einops-0.6.1 einops-exts-0.0.4 fastapi-0.115.0 ffmpy-0.4.0 gradio-4.16.0 gradio-client-0.8.1 h11-0.14.0 httpcore-0.17.3 httpx-0.24.0 latex2mathml-3.77.0 llava-1.2.2.post1 markdown2-2.5.0 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.18.1 nvidia-nvjitlink-cu12-12.6.68 nvidia-nvtx-cu12-12.1.105 orjson-3.10.7 peft-0.12.0 pydub-0.25.1 python-multipart-0.0.9 ruff-0.6.6 scikit-learn-1.2.2 semantic-version-2.10.0 shortuuid-1.0.13 starlette-0.38.5 svgwrite-1.4.3 timm-0.6.13 tokenizers-0.15.1 tomlkit-0.12.0 torch-2.1.2 torchvision-0.16.2 transformers-4.37.2 triton-2.1.0 uvicorn-0.30.6 wavedrom-2.0.3.post3 websockets-11.0.3\n"
     ]
    }
   ],
   "source": [
    "# The pip install -e . lets us install the repository in editable mode\n",
    "!git clone https://github.com/haotian-liu/LLaVA.git\n",
    "!cd LLaVA && pip install --upgrade pip && pip install -e ."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Obtaining file:///content/drive/MyDrive/UHN_Test/LLaVA\n",
      "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
      "  Checking if build backend supports build_editable ... \u001b[?25l\u001b[?25hdone\n",
      "  Getting requirements to build editable ... \u001b[?25l\u001b[?25hdone\n",
      "  Preparing editable metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
      "Requirement already satisfied: torch==2.1.2 in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (2.1.2)\n",
      "Requirement already satisfied: torchvision==0.16.2 in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (0.16.2)\n",
      "Requirement already satisfied: transformers==4.37.2 in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (4.37.2)\n",
      "Requirement already satisfied: tokenizers==0.15.1 in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (0.15.1)\n",
      "Requirement already satisfied: sentencepiece==0.1.99 in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (0.1.99)\n",
      "Requirement already satisfied: shortuuid in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (1.0.13)\n",
      "Requirement already satisfied: accelerate==0.21.0 in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (0.21.0)\n",
      "Requirement already satisfied: peft in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (0.12.0)\n",
      "Requirement already satisfied: bitsandbytes in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (0.43.3)\n",
      "Requirement already satisfied: pydantic in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (2.9.2)\n",
      "Requirement already satisfied: markdown2[all] in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (2.5.0)\n",
      "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (1.26.4)\n",
      "Requirement already satisfied: scikit-learn==1.2.2 in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (1.2.2)\n",
      "Requirement already satisfied: gradio==4.16.0 in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (4.16.0)\n",
      "Requirement already satisfied: gradio-client==0.8.1 in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (0.8.1)\n",
      "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (2.32.3)\n",
      "Requirement already satisfied: httpx==0.24.0 in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (0.24.0)\n",
      "Requirement already satisfied: uvicorn in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (0.30.6)\n",
      "Requirement already satisfied: fastapi in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (0.115.0)\n",
      "Requirement already satisfied: einops==0.6.1 in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (0.6.1)\n",
      "Requirement already satisfied: einops-exts==0.0.4 in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (0.0.4)\n",
      "Requirement already satisfied: timm==0.6.13 in /usr/local/lib/python3.10/dist-packages (from llava==1.2.2.post1) (0.6.13)\n",
      "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate==0.21.0->llava==1.2.2.post1) (24.1)\n",
      "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate==0.21.0->llava==1.2.2.post1) (5.9.5)\n",
      "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate==0.21.0->llava==1.2.2.post1) (6.0.2)\n",
      "Requirement already satisfied: aiofiles<24.0,>=22.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (23.2.1)\n",
      "Requirement already satisfied: altair<6.0,>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (4.2.2)\n",
      "Requirement already satisfied: ffmpy in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (0.4.0)\n",
      "Requirement already satisfied: huggingface-hub>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (0.24.7)\n",
      "Requirement already satisfied: importlib-resources<7.0,>=1.3 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (6.4.5)\n",
      "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (3.1.4)\n",
      "Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (2.1.5)\n",
      "Requirement already satisfied: matplotlib~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (3.7.1)\n",
      "Requirement already satisfied: orjson~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (3.10.7)\n",
      "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (2.1.4)\n",
      "Requirement already satisfied: pillow<11.0,>=8.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (10.4.0)\n",
      "Requirement already satisfied: pydub in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (0.25.1)\n",
      "Requirement already satisfied: python-multipart in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (0.0.9)\n",
      "Requirement already satisfied: ruff>=0.1.7 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (0.6.6)\n",
      "Requirement already satisfied: semantic-version~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (2.10.0)\n",
      "Requirement already satisfied: tomlkit==0.12.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (0.12.0)\n",
      "Requirement already satisfied: typer<1.0,>=0.9 in /usr/local/lib/python3.10/dist-packages (from typer[all]<1.0,>=0.9->gradio==4.16.0->llava==1.2.2.post1) (0.12.5)\n",
      "Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.16.0->llava==1.2.2.post1) (4.12.2)\n",
      "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from gradio-client==0.8.1->llava==1.2.2.post1) (2024.6.1)\n",
      "Requirement already satisfied: websockets<12.0,>=10.0 in /usr/local/lib/python3.10/dist-packages (from gradio-client==0.8.1->llava==1.2.2.post1) (11.0.3)\n",
      "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx==0.24.0->llava==1.2.2.post1) (2024.8.30)\n",
      "Requirement already satisfied: httpcore<0.18.0,>=0.15.0 in /usr/local/lib/python3.10/dist-packages (from httpx==0.24.0->llava==1.2.2.post1) (0.17.3)\n",
      "Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx==0.24.0->llava==1.2.2.post1) (3.10)\n",
      "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx==0.24.0->llava==1.2.2.post1) (1.3.1)\n",
      "Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.2.2->llava==1.2.2.post1) (1.13.1)\n",
      "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.2.2->llava==1.2.2.post1) (1.4.2)\n",
      "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.2.2->llava==1.2.2.post1) (3.5.0)\n",
      "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (3.16.1)\n",
      "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (1.13.2)\n",
      "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (3.3)\n",
      "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (8.9.2.26)\n",
      "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (12.1.3.1)\n",
      "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (11.0.2.54)\n",
      "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (10.3.2.106)\n",
      "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (11.4.5.107)\n",
      "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (12.1.0.106)\n",
      "Requirement already satisfied: nvidia-nccl-cu12==2.18.1 in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (2.18.1)\n",
      "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (12.1.105)\n",
      "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch==2.1.2->llava==1.2.2.post1) (2.1.0)\n",
      "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.37.2->llava==1.2.2.post1) (2024.9.11)\n",
      "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.37.2->llava==1.2.2.post1) (0.4.5)\n",
      "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers==4.37.2->llava==1.2.2.post1) (4.66.5)\n",
      "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch==2.1.2->llava==1.2.2.post1) (12.6.68)\n",
      "Collecting deepspeed==0.12.6 (from llava==1.2.2.post1)\n",
      "  Downloading deepspeed-0.12.6.tar.gz (1.2 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m53.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "Collecting ninja (from llava==1.2.2.post1)\n",
      "  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl.metadata (5.3 kB)\n",
      "Collecting wandb (from llava==1.2.2.post1)\n",
      "  Downloading wandb-0.18.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)\n",
      "Collecting hjson (from deepspeed==0.12.6->llava==1.2.2.post1)\n",
      "  Downloading hjson-3.1.0-py3-none-any.whl.metadata (2.6 kB)\n",
      "Requirement already satisfied: py-cpuinfo in /usr/local/lib/python3.10/dist-packages (from deepspeed==0.12.6->llava==1.2.2.post1) (9.0.0)\n",
      "Collecting pynvml (from deepspeed==0.12.6->llava==1.2.2.post1)\n",
      "  Downloading pynvml-11.5.3-py3-none-any.whl.metadata (8.8 kB)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic->llava==1.2.2.post1) (0.7.0)\n",
      "Requirement already satisfied: pydantic-core==2.23.4 in /usr/local/lib/python3.10/dist-packages (from pydantic->llava==1.2.2.post1) (2.23.4)\n",
      "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn->llava==1.2.2.post1) (8.1.7)\n",
      "Requirement already satisfied: h11>=0.8 in /usr/local/lib/python3.10/dist-packages (from uvicorn->llava==1.2.2.post1) (0.14.0)\n",
      "Requirement already satisfied: starlette<0.39.0,>=0.37.2 in /usr/local/lib/python3.10/dist-packages (from fastapi->llava==1.2.2.post1) (0.38.5)\n",
      "Requirement already satisfied: pygments>=2.7.3 in /usr/local/lib/python3.10/dist-packages (from markdown2[all]->llava==1.2.2.post1) (2.18.0)\n",
      "Requirement already satisfied: wavedrom in /usr/local/lib/python3.10/dist-packages (from markdown2[all]->llava==1.2.2.post1) (2.0.3.post3)\n",
      "Requirement already satisfied: latex2mathml in /usr/local/lib/python3.10/dist-packages (from markdown2[all]->llava==1.2.2.post1) (3.77.0)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->llava==1.2.2.post1) (3.3.2)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->llava==1.2.2.post1) (2.0.7)\n",
      "Collecting docker-pycreds>=0.4.0 (from wandb->llava==1.2.2.post1)\n",
      "  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)\n",
      "Collecting gitpython!=3.1.29,>=1.0.0 (from wandb->llava==1.2.2.post1)\n",
      "  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)\n",
      "Requirement already satisfied: platformdirs in /usr/local/lib/python3.10/dist-packages (from wandb->llava==1.2.2.post1) (4.3.6)\n",
      "Requirement already satisfied: protobuf!=4.21.0,!=5.28.0,<6,>=3.19.0 in /usr/local/lib/python3.10/dist-packages (from wandb->llava==1.2.2.post1) (3.20.3)\n",
      "Collecting sentry-sdk>=1.0.0 (from wandb->llava==1.2.2.post1)\n",
      "  Downloading sentry_sdk-2.14.0-py2.py3-none-any.whl.metadata (9.7 kB)\n",
      "Collecting setproctitle (from wandb->llava==1.2.2.post1)\n",
      "  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.9 kB)\n",
      "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from wandb->llava==1.2.2.post1) (71.0.4)\n",
      "Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio==4.16.0->llava==1.2.2.post1) (0.4)\n",
      "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio==4.16.0->llava==1.2.2.post1) (4.23.0)\n",
      "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio==4.16.0->llava==1.2.2.post1) (0.12.1)\n",
      "Requirement already satisfied: six>=1.4.0 in /usr/local/lib/python3.10/dist-packages (from docker-pycreds>=0.4.0->wandb->llava==1.2.2.post1) (1.16.0)\n",
      "Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.29,>=1.0.0->wandb->llava==1.2.2.post1)\n",
      "  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)\n",
      "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.10/dist-packages (from httpcore<0.18.0,>=0.15.0->httpx==0.24.0->llava==1.2.2.post1) (3.7.1)\n",
      "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.16.0->llava==1.2.2.post1) (1.3.0)\n",
      "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.16.0->llava==1.2.2.post1) (0.12.1)\n",
      "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.16.0->llava==1.2.2.post1) (4.53.1)\n",
      "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.16.0->llava==1.2.2.post1) (1.4.7)\n",
      "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.16.0->llava==1.2.2.post1) (3.1.4)\n",
      "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.16.0->llava==1.2.2.post1) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio==4.16.0->llava==1.2.2.post1) (2024.2)\n",
      "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio==4.16.0->llava==1.2.2.post1) (2024.1)\n",
      "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.9->typer[all]<1.0,>=0.9->gradio==4.16.0->llava==1.2.2.post1) (1.5.4)\n",
      "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.9->typer[all]<1.0,>=0.9->gradio==4.16.0->llava==1.2.2.post1) (13.8.1)\n",
      "\u001b[33mWARNING: typer 0.12.5 does not provide the extra 'all'\u001b[0m\u001b[33m\n",
      "\u001b[0mRequirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch==2.1.2->llava==1.2.2.post1) (1.3.0)\n",
      "Requirement already satisfied: svgwrite in /usr/local/lib/python3.10/dist-packages (from wavedrom->markdown2[all]->llava==1.2.2.post1) (1.4.3)\n",
      "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5.0,>=3.0->httpcore<0.18.0,>=0.15.0->httpx==0.24.0->llava==1.2.2.post1) (1.2.2)\n",
      "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb->llava==1.2.2.post1)\n",
      "  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)\n",
      "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==4.16.0->llava==1.2.2.post1) (24.2.0)\n",
      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==4.16.0->llava==1.2.2.post1) (2023.12.1)\n",
      "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==4.16.0->llava==1.2.2.post1) (0.35.1)\n",
      "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==4.16.0->llava==1.2.2.post1) (0.20.0)\n",
      "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.9->typer[all]<1.0,>=0.9->gradio==4.16.0->llava==1.2.2.post1) (3.0.0)\n",
      "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.9->typer[all]<1.0,>=0.9->gradio==4.16.0->llava==1.2.2.post1) (0.1.2)\n",
      "Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n",
      "Downloading wandb-0.18.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.7 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.7/9.7 MB\u001b[0m \u001b[31m159.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n",
      "Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)\n",
      "Downloading sentry_sdk-2.14.0-py2.py3-none-any.whl (311 kB)\n",
      "Downloading hjson-3.1.0-py3-none-any.whl (54 kB)\n",
      "Downloading pynvml-11.5.3-py3-none-any.whl (53 kB)\n",
      "Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n",
      "Downloading gitdb-4.0.11-py3-none-any.whl (62 kB)\n",
      "Downloading smmap-5.0.1-py3-none-any.whl (24 kB)\n",
      "Building wheels for collected packages: llava, deepspeed\n",
      "  Building editable for llava (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
      "  Created wheel for llava: filename=llava-1.2.2.post1-0.editable-py3-none-any.whl size=17886 sha256=0c72e21f800abe34f0b86903d580c1c30a7db7964aa5a4bc349da22a074e6c12\n",
      "  Stored in directory: /tmp/pip-ephem-wheel-cache-yfwv0kmp/wheels/fa/1d/85/c83c163007fce90e8a855668d1d5cf33e92c0127727708c615\n",
      "  Building wheel for deepspeed (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "  Created wheel for deepspeed: filename=deepspeed-0.12.6-py3-none-any.whl size=1306725 sha256=46e2005ecb563fb979dda831591cabbc611bb2e85b86e86532feee75d3affaf4\n",
      "  Stored in directory: /root/.cache/pip/wheels/a3/dc/a2/f585faaed4dec84108916dcc8e8a7c129a216df8202ca32984\n",
      "Successfully built llava deepspeed\n",
      "Installing collected packages: ninja, hjson, smmap, setproctitle, sentry-sdk, pynvml, docker-pycreds, gitdb, gitpython, wandb, deepspeed, llava\n",
      "  Attempting uninstall: llava\n",
      "    Found existing installation: llava 1.2.2.post1\n",
      "    Uninstalling llava-1.2.2.post1:\n",
      "      Successfully uninstalled llava-1.2.2.post1\n",
      "Successfully installed deepspeed-0.12.6 docker-pycreds-0.4.0 gitdb-4.0.11 gitpython-3.1.43 hjson-3.1.0 llava-1.2.2.post1 ninja-1.11.1.1 pynvml-11.5.3 sentry-sdk-2.14.0 setproctitle-1.3.3 smmap-5.0.1 wandb-0.18.1\n",
      "Collecting flash-attn\n",
      "  Downloading flash_attn-2.6.3.tar.gz (2.6 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m95.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from flash-attn) (2.1.2)\n",
      "Requirement already satisfied: einops in /usr/local/lib/python3.10/dist-packages (from flash-attn) (0.6.1)\n",
      "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.16.1)\n",
      "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (4.12.2)\n",
      "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (1.13.2)\n",
      "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.3)\n",
      "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.1.4)\n",
      "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (2024.6.1)\n",
      "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (8.9.2.26)\n",
      "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.3.1)\n",
      "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (11.0.2.54)\n",
      "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (10.3.2.106)\n",
      "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (11.4.5.107)\n",
      "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.0.106)\n",
      "Requirement already satisfied: nvidia-nccl-cu12==2.18.1 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (2.18.1)\n",
      "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n",
      "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (2.1.0)\n",
      "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch->flash-attn) (12.6.68)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->flash-attn) (2.1.5)\n",
      "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->flash-attn) (1.3.0)\n",
      "Building wheels for collected packages: flash-attn\n",
      "  Building wheel for flash-attn (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "  Created wheel for flash-attn: filename=flash_attn-2.6.3-cp310-cp310-linux_x86_64.whl size=186975250 sha256=edf1fa13f79c5dc6adc092eb70d5218d954a5d03040383f513c57b8df60d5707\n",
      "  Stored in directory: /root/.cache/pip/wheels/7e/e3/c3/89c7a2f3c4adc07cd1c675f8bb7b9ad4d18f64a72bccdfe826\n",
      "Successfully built flash-attn\n",
      "Installing collected packages: flash-attn\n",
      "Successfully installed flash-attn-2.6.3\n"
     ]
    }
   ],
   "source": [
    "# Install Deepspeed\n",
    "!cd LLaVA && pip install -e \".[train]\"\n",
    "!pip install flash-attn --no-build-isolation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: deepspeed in /usr/local/lib/python3.10/dist-packages (0.12.6)\n",
      "Requirement already satisfied: hjson in /usr/local/lib/python3.10/dist-packages (from deepspeed) (3.1.0)\n",
      "Requirement already satisfied: ninja in /usr/local/lib/python3.10/dist-packages (from deepspeed) (1.11.1.1)\n",
      "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from deepspeed) (1.26.4)\n",
      "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from deepspeed) (24.1)\n",
      "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from deepspeed) (5.9.5)\n",
      "Requirement already satisfied: py-cpuinfo in /usr/local/lib/python3.10/dist-packages (from deepspeed) (9.0.0)\n",
      "Requirement already satisfied: pydantic in /usr/local/lib/python3.10/dist-packages (from deepspeed) (2.9.2)\n",
      "Requirement already satisfied: pynvml in /usr/local/lib/python3.10/dist-packages (from deepspeed) (11.5.3)\n",
      "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from deepspeed) (2.1.2)\n",
      "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from deepspeed) (4.66.5)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic->deepspeed) (0.7.0)\n",
      "Requirement already satisfied: pydantic-core==2.23.4 in /usr/local/lib/python3.10/dist-packages (from pydantic->deepspeed) (2.23.4)\n",
      "Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic->deepspeed) (4.12.2)\n",
      "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (3.16.1)\n",
      "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (1.13.2)\n",
      "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (3.3)\n",
      "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (3.1.4)\n",
      "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (2024.6.1)\n",
      "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (8.9.2.26)\n",
      "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (12.1.3.1)\n",
      "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (11.0.2.54)\n",
      "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (10.3.2.106)\n",
      "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (11.4.5.107)\n",
      "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (12.1.0.106)\n",
      "Requirement already satisfied: nvidia-nccl-cu12==2.18.1 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (2.18.1)\n",
      "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (12.1.105)\n",
      "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (2.1.0)\n",
      "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch->deepspeed) (12.6.68)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->deepspeed) (2.1.5)\n",
      "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->deepspeed) (1.3.0)\n"
     ]
    }
   ],
   "source": [
    "!pip install deepspeed"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Finetune LLaVA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (2.17.0)\n",
      "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (1.4.0)\n",
      "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (1.64.1)\n",
      "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (3.7)\n",
      "Requirement already satisfied: numpy>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (1.26.4)\n",
      "Requirement already satisfied: protobuf!=4.24.0,<5.0.0,>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (3.20.3)\n",
      "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (71.0.4)\n",
      "Requirement already satisfied: six>1.9 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (1.16.0)\n",
      "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (0.7.2)\n",
      "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (3.0.4)\n",
      "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard) (2.1.5)\n"
     ]
    }
   ],
   "source": [
    "!pip install -r requirements.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "\n",
    "torch.cuda.empty_cache() # Clear unnecessary GPU cache to free up memory\n",
    "\n",
    "# Set environment variable to reduce memory fragmentation issues\n",
    "import os\n",
    "os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2024-09-21 00:51:18,626] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
      "2024-09-21 00:51:21.974892: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
      "2024-09-21 00:51:21.995877: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
      "2024-09-21 00:51:22.002161: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "2024-09-21 00:51:23.229205: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
      "[2024-09-21 00:51:24,226] [WARNING] [runner.py:202:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.\n",
      "[2024-09-21 00:51:24,227] [INFO] [runner.py:571:main] cmd = /usr/bin/python3 -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMF19 --master_addr=127.0.0.1 --master_port=29500 --enable_each_rank_log=None LLaVA/llava/train/train_mem.py --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 --deepspeed LLaVA/scripts/zero3.json --model_name_or_path liuhaotian/llava-v1.5-7b --version v1 --data_path ./dataset/train/dataset.json --image_folder ./dataset/images --vision_tower openai/clip-vit-large-patch14-336 --mm_projector_type mlp2x_gelu --mm_vision_select_layer -2 --mm_use_im_start_end False --mm_use_im_patch_token False --image_aspect_ratio pad --group_by_modality_length True --bf16 True --output_dir ./checkpoints/llava-v1.5-7b-task-lora --num_train_epochs 1 --per_device_train_batch_size 16 --per_device_eval_batch_size 4 --gradient_accumulation_steps 1 --evaluation_strategy no --save_strategy steps --save_steps 50000 --save_total_limit 1 --learning_rate 2e-4 --weight_decay 0. --warmup_ratio 0.03 --lr_scheduler_type cosine --logging_steps 1 --tf32 True --model_max_length 2048 --gradient_checkpointing True --dataloader_num_workers 4 --lazy_preprocess True --report_to tensorboard\n",
      "[2024-09-21 00:51:26,638] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
      "2024-09-21 00:51:29.531920: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
      "2024-09-21 00:51:29.553953: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
      "2024-09-21 00:51:29.560405: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "2024-09-21 00:51:30.780090: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
      "[2024-09-21 00:51:31,740] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_DEV_PACKAGE=libnccl-dev=2.19.3-1+cuda12.2\n",
      "[2024-09-21 00:51:31,740] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_DEV_PACKAGE_VERSION=2.19.3-1\n",
      "[2024-09-21 00:51:31,740] [INFO] [launch.py:138:main] 0 NCCL_VERSION=2.19.3-1\n",
      "[2024-09-21 00:51:31,740] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_DEV_PACKAGE_NAME=libnccl-dev\n",
      "[2024-09-21 00:51:31,740] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_PACKAGE=libnccl2=2.19.3-1+cuda12.2\n",
      "[2024-09-21 00:51:31,740] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_PACKAGE_NAME=libnccl2\n",
      "[2024-09-21 00:51:31,740] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_PACKAGE_VERSION=2.19.3-1\n",
      "[2024-09-21 00:51:31,740] [INFO] [launch.py:145:main] WORLD INFO DICT: {'localhost': [0]}\n",
      "[2024-09-21 00:51:31,741] [INFO] [launch.py:151:main] nnodes=1, num_local_procs=1, node_rank=0\n",
      "[2024-09-21 00:51:31,741] [INFO] [launch.py:162:main] global_rank_mapping=defaultdict(<class 'list'>, {'localhost': [0]})\n",
      "[2024-09-21 00:51:31,741] [INFO] [launch.py:163:main] dist_world_size=1\n",
      "[2024-09-21 00:51:31,741] [INFO] [launch.py:165:main] Setting CUDA_VISIBLE_DEVICES=0\n",
      "[2024-09-21 00:51:37,799] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
      "2024-09-21 00:51:39.117444: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
      "2024-09-21 00:51:39.137805: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
      "2024-09-21 00:51:39.144019: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "2024-09-21 00:51:40.356528: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
      "[2024-09-21 00:51:44,630] [INFO] [comm.py:637:init_distributed] cdb=None\n",
      "[2024-09-21 00:51:44,630] [INFO] [comm.py:668:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl\n",
      "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
      "  warnings.warn(\n",
      "config.json: 100% 1.16k/1.16k [00:00<00:00, 8.35MB/s]\n",
      "You are using a model of type llava to instantiate a model of type llava_llama. This is not supported for all configurations of models and can yield errors.\n",
      "pytorch_model.bin.index.json: 100% 27.1k/27.1k [00:00<00:00, 36.0MB/s]\n",
      "Downloading shards:   0% 0/2 [00:00<?, ?it/s]\n",
      "pytorch_model-00001-of-00002.bin:   0% 0.00/9.98G [00:00<?, ?B/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   0% 21.0M/9.98G [00:00<00:48, 204MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   1% 62.9M/9.98G [00:00<00:31, 312MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   1% 105M/9.98G [00:00<00:27, 357MB/s] \u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   1% 147M/9.98G [00:00<00:26, 374MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   2% 189M/9.98G [00:00<00:26, 374MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   2% 231M/9.98G [00:00<00:26, 368MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   3% 273M/9.98G [00:00<00:25, 382MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   3% 315M/9.98G [00:00<00:24, 392MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   4% 357M/9.98G [00:00<00:24, 397MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   4% 398M/9.98G [00:01<00:23, 399MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   4% 440M/9.98G [00:01<00:23, 403MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   5% 482M/9.98G [00:01<00:23, 405MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   5% 524M/9.98G [00:01<00:23, 396MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   6% 566M/9.98G [00:01<00:23, 400MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   6% 608M/9.98G [00:01<00:23, 403MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   7% 650M/9.98G [00:01<00:23, 404MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   7% 692M/9.98G [00:01<00:22, 406MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   7% 734M/9.98G [00:01<00:23, 398MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   8% 776M/9.98G [00:01<00:22, 402MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   8% 818M/9.98G [00:02<00:22, 404MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   9% 860M/9.98G [00:02<00:23, 393MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   9% 902M/9.98G [00:02<00:23, 389MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   9% 944M/9.98G [00:02<00:23, 386MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  10% 986M/9.98G [00:02<00:24, 370MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  10% 1.03G/9.98G [00:02<00:24, 363MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  11% 1.07G/9.98G [00:02<00:24, 365MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  11% 1.11G/9.98G [00:02<00:24, 359MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  12% 1.15G/9.98G [00:03<00:24, 353MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  12% 1.20G/9.98G [00:03<00:24, 361MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  12% 1.24G/9.98G [00:03<00:23, 365MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  13% 1.28G/9.98G [00:03<00:23, 364MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  13% 1.32G/9.98G [00:03<00:23, 366MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  14% 1.36G/9.98G [00:03<00:23, 367MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  14% 1.41G/9.98G [00:03<00:23, 364MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  15% 1.45G/9.98G [00:03<00:23, 364MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  15% 1.49G/9.98G [00:03<00:22, 371MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  15% 1.53G/9.98G [00:04<00:23, 365MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  16% 1.57G/9.98G [00:04<00:22, 368MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  16% 1.61G/9.98G [00:04<00:22, 372MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  17% 1.66G/9.98G [00:04<00:22, 371MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  17% 1.70G/9.98G [00:04<00:22, 371MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  17% 1.74G/9.98G [00:04<00:21, 378MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  18% 1.78G/9.98G [00:04<00:21, 384MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  18% 1.82G/9.98G [00:04<00:21, 379MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  19% 1.87G/9.98G [00:04<00:21, 380MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  19% 1.91G/9.98G [00:05<00:20, 385MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  20% 1.95G/9.98G [00:05<00:20, 386MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  20% 1.99G/9.98G [00:05<00:20, 381MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  20% 2.03G/9.98G [00:05<00:21, 365MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  21% 2.08G/9.98G [00:05<00:27, 282MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  21% 2.11G/9.98G [00:05<00:27, 281MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  21% 2.14G/9.98G [00:05<00:27, 288MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  22% 2.18G/9.98G [00:05<00:25, 301MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  22% 2.22G/9.98G [00:06<00:24, 315MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  23% 2.26G/9.98G [00:06<00:23, 329MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  23% 2.31G/9.98G [00:06<00:22, 339MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  24% 2.35G/9.98G [00:06<00:21, 354MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  24% 2.39G/9.98G [00:06<00:20, 367MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  24% 2.43G/9.98G [00:06<00:20, 370MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  25% 2.47G/9.98G [00:06<00:20, 367MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  25% 2.52G/9.98G [00:06<00:20, 370MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  26% 2.56G/9.98G [00:06<00:19, 380MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  26% 2.60G/9.98G [00:07<00:19, 383MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  26% 2.64G/9.98G [00:07<00:18, 387MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  27% 2.68G/9.98G [00:07<00:18, 392MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  27% 2.73G/9.98G [00:07<00:18, 395MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  28% 2.77G/9.98G [00:07<00:18, 399MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  28% 2.81G/9.98G [00:07<00:18, 392MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  29% 2.85G/9.98G [00:07<00:18, 388MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  29% 2.89G/9.98G [00:07<00:18, 390MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  29% 2.94G/9.98G [00:07<00:18, 388MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  30% 2.98G/9.98G [00:08<00:17, 390MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  30% 3.02G/9.98G [00:08<00:18, 386MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  31% 3.06G/9.98G [00:08<00:18, 381MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  31% 3.10G/9.98G [00:08<00:18, 382MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  32% 3.15G/9.98G [00:08<00:17, 385MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  32% 3.19G/9.98G [00:08<00:17, 378MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  32% 3.23G/9.98G [00:08<00:17, 379MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  33% 3.27G/9.98G [00:08<00:17, 376MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  33% 3.31G/9.98G [00:08<00:17, 372MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  34% 3.36G/9.98G [00:09<00:17, 374MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  34% 3.40G/9.98G [00:09<00:17, 375MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  34% 3.44G/9.98G [00:09<00:17, 379MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  35% 3.48G/9.98G [00:09<00:17, 381MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  35% 3.52G/9.98G [00:09<00:16, 382MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  36% 3.57G/9.98G [00:09<00:16, 382MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  36% 3.61G/9.98G [00:09<00:16, 385MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  37% 3.65G/9.98G [00:09<00:16, 385MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  37% 3.69G/9.98G [00:09<00:16, 384MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  37% 3.73G/9.98G [00:10<00:16, 381MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  38% 3.77G/9.98G [00:10<00:16, 372MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  38% 3.82G/9.98G [00:10<00:16, 367MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  39% 3.86G/9.98G [00:10<00:16, 367MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  39% 3.90G/9.98G [00:10<00:16, 369MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  40% 3.94G/9.98G [00:10<00:25, 237MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  40% 3.98G/9.98G [00:10<00:22, 267MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  40% 4.03G/9.98G [00:11<00:20, 291MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  41% 4.07G/9.98G [00:11<00:18, 313MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  41% 4.11G/9.98G [00:11<00:18, 326MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  42% 4.15G/9.98G [00:11<00:17, 339MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  42% 4.19G/9.98G [00:11<00:16, 345MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  42% 4.24G/9.98G [00:11<00:16, 352MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  43% 4.28G/9.98G [00:11<00:15, 356MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  43% 4.32G/9.98G [00:11<00:15, 361MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  44% 4.36G/9.98G [00:11<00:15, 366MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  44% 4.40G/9.98G [00:12<00:17, 319MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  45% 4.45G/9.98G [00:12<00:21, 262MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  45% 4.48G/9.98G [00:12<00:23, 232MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  45% 4.51G/9.98G [00:12<00:26, 207MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  46% 4.54G/9.98G [00:12<00:28, 190MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  46% 4.56G/9.98G [00:13<00:30, 179MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  46% 4.58G/9.98G [00:13<00:30, 179MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  46% 4.60G/9.98G [00:13<00:32, 166MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  46% 4.62G/9.98G [00:13<00:34, 157MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  47% 4.65G/9.98G [00:13<00:34, 154MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  47% 4.67G/9.98G [00:13<00:36, 144MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  47% 4.69G/9.98G [00:13<00:35, 148MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  47% 4.71G/9.98G [00:14<00:34, 155MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  47% 4.73G/9.98G [00:14<00:33, 157MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  48% 4.75G/9.98G [00:14<00:32, 161MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  48% 4.77G/9.98G [00:14<00:33, 157MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  48% 4.79G/9.98G [00:14<00:32, 160MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  48% 4.81G/9.98G [00:14<00:32, 159MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  48% 4.83G/9.98G [00:14<00:32, 156MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  49% 4.85G/9.98G [00:15<00:34, 146MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  49% 4.88G/9.98G [00:15<00:36, 139MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  49% 4.90G/9.98G [00:15<00:41, 123MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  49% 4.92G/9.98G [00:15<00:46, 109MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  50% 4.94G/9.98G [00:15<00:54, 91.8MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  50% 4.98G/9.98G [00:16<00:35, 140MB/s] \u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  50% 5.02G/9.98G [00:16<00:26, 188MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  51% 5.06G/9.98G [00:16<00:21, 230MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  51% 5.11G/9.98G [00:16<00:18, 266MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  52% 5.15G/9.98G [00:16<00:16, 293MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  52% 5.19G/9.98G [00:16<00:15, 315MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  52% 5.23G/9.98G [00:16<00:14, 333MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  53% 5.27G/9.98G [00:16<00:13, 345MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  53% 5.32G/9.98G [00:16<00:13, 354MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  54% 5.36G/9.98G [00:17<00:13, 355MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  54% 5.40G/9.98G [00:17<00:12, 361MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  55% 5.44G/9.98G [00:17<00:12, 372MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  55% 5.48G/9.98G [00:17<00:11, 376MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  55% 5.53G/9.98G [00:17<00:11, 373MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  56% 5.57G/9.98G [00:17<00:11, 371MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  56% 5.61G/9.98G [00:17<00:11, 365MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  57% 5.65G/9.98G [00:17<00:11, 362MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  57% 5.69G/9.98G [00:17<00:11, 359MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  57% 5.74G/9.98G [00:18<00:11, 358MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  58% 5.78G/9.98G [00:18<00:11, 360MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  58% 5.82G/9.98G [00:18<00:11, 358MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  59% 5.86G/9.98G [00:18<00:11, 361MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  59% 5.90G/9.98G [00:18<00:11, 360MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  60% 5.95G/9.98G [00:18<00:11, 360MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  60% 5.99G/9.98G [00:18<00:11, 352MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  60% 6.03G/9.98G [00:18<00:11, 346MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  61% 6.07G/9.98G [00:19<00:11, 351MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  61% 6.11G/9.98G [00:19<00:10, 353MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  62% 6.16G/9.98G [00:19<00:10, 358MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  62% 6.20G/9.98G [00:19<00:10, 358MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  63% 6.24G/9.98G [00:19<00:10, 361MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  63% 6.28G/9.98G [00:19<00:10, 362MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  63% 6.32G/9.98G [00:19<00:10, 362MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  64% 6.36G/9.98G [00:19<00:09, 364MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  64% 6.41G/9.98G [00:19<00:09, 362MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  65% 6.45G/9.98G [00:20<00:09, 362MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  65% 6.49G/9.98G [00:20<00:09, 364MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  65% 6.53G/9.98G [00:20<00:09, 364MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  66% 6.57G/9.98G [00:20<00:09, 362MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  66% 6.62G/9.98G [00:20<00:09, 366MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  67% 6.66G/9.98G [00:20<00:09, 365MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  67% 6.70G/9.98G [00:20<00:09, 363MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  68% 6.74G/9.98G [00:20<00:08, 367MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  68% 6.78G/9.98G [00:21<00:08, 368MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  68% 6.83G/9.98G [00:21<00:08, 369MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  69% 6.87G/9.98G [00:21<00:08, 370MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  69% 6.91G/9.98G [00:21<00:08, 372MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  70% 6.95G/9.98G [00:21<00:10, 296MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  70% 6.99G/9.98G [00:21<00:11, 254MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  70% 7.03G/9.98G [00:21<00:12, 235MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  71% 7.06G/9.98G [00:22<00:13, 212MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  71% 7.09G/9.98G [00:22<00:14, 196MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  71% 7.11G/9.98G [00:22<00:15, 179MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  71% 7.13G/9.98G [00:22<00:16, 169MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  72% 7.15G/9.98G [00:22<00:18, 156MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  72% 7.17G/9.98G [00:23<00:20, 136MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  72% 7.19G/9.98G [00:23<00:23, 118MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  72% 7.21G/9.98G [00:23<00:29, 93.6MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  73% 7.24G/9.98G [00:23<00:29, 91.9MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  73% 7.26G/9.98G [00:23<00:26, 104MB/s] \u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  73% 7.28G/9.98G [00:24<00:22, 121MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  73% 7.31G/9.98G [00:24<00:17, 155MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  74% 7.35G/9.98G [00:24<00:12, 205MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  74% 7.39G/9.98G [00:24<00:10, 246MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  75% 7.43G/9.98G [00:24<00:09, 274MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  75% 7.48G/9.98G [00:24<00:08, 301MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  75% 7.52G/9.98G [00:24<00:07, 324MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  76% 7.56G/9.98G [00:24<00:07, 342MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  76% 7.60G/9.98G [00:24<00:06, 349MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  77% 7.64G/9.98G [00:25<00:06, 352MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  77% 7.69G/9.98G [00:25<00:06, 353MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  77% 7.73G/9.98G [00:25<00:06, 356MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  78% 7.77G/9.98G [00:25<00:06, 356MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  78% 7.81G/9.98G [00:25<00:05, 365MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  79% 7.85G/9.98G [00:25<00:05, 371MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  79% 7.90G/9.98G [00:25<00:05, 364MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  80% 7.94G/9.98G [00:25<00:05, 364MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  80% 7.98G/9.98G [00:26<00:05, 365MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  80% 8.02G/9.98G [00:26<00:05, 364MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  81% 8.06G/9.98G [00:26<00:05, 365MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  81% 8.11G/9.98G [00:26<00:05, 367MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  82% 8.15G/9.98G [00:26<00:07, 237MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  82% 8.19G/9.98G [00:26<00:06, 260MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  83% 8.23G/9.98G [00:26<00:06, 285MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  83% 8.27G/9.98G [00:27<00:05, 308MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  83% 8.32G/9.98G [00:27<00:05, 328MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  84% 8.36G/9.98G [00:27<00:04, 333MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  84% 8.40G/9.98G [00:27<00:04, 340MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  85% 8.44G/9.98G [00:27<00:04, 346MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  85% 8.48G/9.98G [00:27<00:04, 348MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  85% 8.52G/9.98G [00:27<00:04, 351MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  86% 8.57G/9.98G [00:27<00:04, 352MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  86% 8.61G/9.98G [00:27<00:03, 359MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  87% 8.65G/9.98G [00:28<00:03, 355MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  87% 8.69G/9.98G [00:28<00:03, 355MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  88% 8.73G/9.98G [00:28<00:03, 362MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  88% 8.78G/9.98G [00:28<00:03, 364MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  88% 8.82G/9.98G [00:28<00:03, 367MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  89% 8.86G/9.98G [00:28<00:03, 364MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  89% 8.90G/9.98G [00:28<00:02, 366MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  90% 8.94G/9.98G [00:28<00:02, 355MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  90% 8.99G/9.98G [00:29<00:02, 356MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  90% 9.03G/9.98G [00:29<00:02, 362MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  91% 9.07G/9.98G [00:29<00:02, 367MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  91% 9.11G/9.98G [00:29<00:02, 366MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  92% 9.15G/9.98G [00:29<00:02, 365MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  92% 9.20G/9.98G [00:29<00:02, 371MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  93% 9.24G/9.98G [00:29<00:02, 362MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  93% 9.28G/9.98G [00:29<00:01, 362MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  93% 9.32G/9.98G [00:29<00:01, 359MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  94% 9.36G/9.98G [00:30<00:01, 361MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  94% 9.41G/9.98G [00:30<00:01, 306MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  95% 9.45G/9.98G [00:30<00:01, 272MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  95% 9.49G/9.98G [00:30<00:01, 285MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  96% 9.53G/9.98G [00:30<00:01, 300MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  96% 9.57G/9.98G [00:30<00:01, 316MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  96% 9.62G/9.98G [00:30<00:01, 311MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  97% 9.66G/9.98G [00:31<00:01, 318MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  97% 9.70G/9.98G [00:31<00:00, 329MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  98% 9.74G/9.98G [00:31<00:00, 344MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  98% 9.78G/9.98G [00:31<00:00, 356MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  98% 9.83G/9.98G [00:31<00:00, 359MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  99% 9.87G/9.98G [00:31<00:00, 363MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  99% 9.91G/9.98G [00:31<00:00, 361MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin: 100% 9.98G/9.98G [00:31<00:00, 312MB/s]\n",
      "Downloading shards:  50% 1/2 [00:32<00:32, 32.07s/it]\n",
      "pytorch_model-00002-of-00002.bin:   0% 0.00/3.54G [00:00<?, ?B/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:   1% 31.5M/3.54G [00:00<00:11, 314MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:   2% 73.4M/3.54G [00:00<00:09, 347MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:   3% 115M/3.54G [00:00<00:09, 356MB/s] \u001b[A\n",
      "pytorch_model-00002-of-00002.bin:   4% 157M/3.54G [00:00<00:09, 357MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:   6% 199M/3.54G [00:00<00:09, 355MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:   7% 241M/3.54G [00:00<00:09, 362MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:   8% 283M/3.54G [00:00<00:08, 365MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:   9% 325M/3.54G [00:00<00:10, 310MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  10% 367M/3.54G [00:01<00:10, 312MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  12% 409M/3.54G [00:01<00:11, 285MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  12% 440M/3.54G [00:01<00:10, 282MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  13% 472M/3.54G [00:01<00:10, 281MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  14% 503M/3.54G [00:01<00:10, 289MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  15% 545M/3.54G [00:01<00:09, 305MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  17% 587M/3.54G [00:01<00:09, 321MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  18% 629M/3.54G [00:01<00:08, 334MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  19% 671M/3.54G [00:02<00:08, 341MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  20% 713M/3.54G [00:02<00:08, 349MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  21% 755M/3.54G [00:02<00:07, 355MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  22% 797M/3.54G [00:02<00:07, 360MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  24% 839M/3.54G [00:02<00:07, 349MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  25% 881M/3.54G [00:02<00:07, 344MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  26% 923M/3.54G [00:02<00:07, 351MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  27% 965M/3.54G [00:02<00:07, 356MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  28% 1.01G/3.54G [00:03<00:07, 358MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  30% 1.05G/3.54G [00:03<00:07, 356MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  31% 1.09G/3.54G [00:03<00:07, 312MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  32% 1.13G/3.54G [00:03<00:07, 324MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  33% 1.17G/3.54G [00:03<00:07, 332MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  34% 1.22G/3.54G [00:03<00:07, 322MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  36% 1.26G/3.54G [00:03<00:07, 319MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  37% 1.30G/3.54G [00:03<00:07, 294MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  38% 1.34G/3.54G [00:04<00:07, 312MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  39% 1.38G/3.54G [00:04<00:07, 296MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  40% 1.43G/3.54G [00:04<00:06, 306MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  41% 1.46G/3.54G [00:05<00:15, 134MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  42% 1.49G/3.54G [00:06<00:32, 62.6MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  43% 1.51G/3.54G [00:06<00:30, 67.0MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  44% 1.55G/3.54G [00:06<00:20, 95.1MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  45% 1.59G/3.54G [00:06<00:15, 127MB/s] \u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  46% 1.64G/3.54G [00:06<00:11, 162MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  47% 1.68G/3.54G [00:06<00:09, 198MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  49% 1.72G/3.54G [00:07<00:07, 229MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  50% 1.76G/3.54G [00:07<00:06, 257MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  51% 1.80G/3.54G [00:07<00:06, 284MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  52% 1.85G/3.54G [00:07<00:05, 302MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  53% 1.89G/3.54G [00:07<00:05, 311MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  54% 1.93G/3.54G [00:07<00:05, 295MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  56% 1.97G/3.54G [00:07<00:05, 293MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  57% 2.01G/3.54G [00:07<00:04, 309MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  58% 2.06G/3.54G [00:08<00:04, 327MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  59% 2.10G/3.54G [00:08<00:04, 336MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  60% 2.14G/3.54G [00:08<00:04, 315MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  62% 2.18G/3.54G [00:08<00:04, 297MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  63% 2.22G/3.54G [00:08<00:04, 308MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  64% 2.26G/3.54G [00:08<00:04, 318MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  65% 2.31G/3.54G [00:08<00:03, 321MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  66% 2.35G/3.54G [00:09<00:03, 324MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  67% 2.39G/3.54G [00:09<00:03, 326MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  69% 2.43G/3.54G [00:09<00:03, 327MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  70% 2.47G/3.54G [00:09<00:03, 330MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  71% 2.52G/3.54G [00:09<00:03, 331MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  72% 2.56G/3.54G [00:09<00:02, 336MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  73% 2.60G/3.54G [00:09<00:02, 340MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  75% 2.64G/3.54G [00:09<00:02, 342MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  76% 2.68G/3.54G [00:10<00:03, 279MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  77% 2.73G/3.54G [00:10<00:02, 297MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  78% 2.77G/3.54G [00:10<00:02, 314MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  79% 2.81G/3.54G [00:10<00:02, 324MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  81% 2.85G/3.54G [00:10<00:02, 261MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  82% 2.89G/3.54G [00:10<00:02, 285MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  83% 2.94G/3.54G [00:10<00:02, 302MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  84% 2.98G/3.54G [00:11<00:01, 318MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  85% 3.02G/3.54G [00:11<00:01, 322MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  86% 3.06G/3.54G [00:11<00:01, 315MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  88% 3.10G/3.54G [00:11<00:01, 312MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  89% 3.15G/3.54G [00:11<00:01, 304MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  90% 3.18G/3.54G [00:11<00:01, 295MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  91% 3.21G/3.54G [00:11<00:01, 287MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  91% 3.24G/3.54G [00:12<00:02, 115MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  93% 3.28G/3.54G [00:12<00:01, 147MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  94% 3.32G/3.54G [00:12<00:01, 181MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  95% 3.37G/3.54G [00:12<00:00, 216MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  96% 3.41G/3.54G [00:13<00:00, 250MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  97% 3.45G/3.54G [00:13<00:00, 283MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  99% 3.49G/3.54G [00:13<00:00, 309MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin: 100% 3.54G/3.54G [00:13<00:00, 260MB/s]\n",
      "Downloading shards: 100% 2/2 [00:45<00:00, 22.89s/it]\n",
      "You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.\n",
      "config.json: 100% 4.76k/4.76k [00:00<00:00, 27.6MB/s]\n",
      "[2024-09-21 00:52:33,557] [INFO] [partition_parameters.py:348:__exit__] finished initializing model - num_params = 295, num_elems = 6.76B\n",
      "Loading checkpoint shards:   0% 0/2 [00:00<?, ?it/s]/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
      "  return self.fget.__get__(instance, owner)()\n",
      "Loading checkpoint shards: 100% 2/2 [00:06<00:00,  3.46s/it]\n",
      "generation_config.json: 100% 124/124 [00:00<00:00, 883kB/s]\n",
      "Adding LoRA adapters...\n",
      "tokenizer_config.json: 100% 749/749 [00:00<00:00, 6.76MB/s]\n",
      "tokenizer.model: 100% 500k/500k [00:00<00:00, 177MB/s]\n",
      "special_tokens_map.json: 100% 438/438 [00:00<00:00, 3.84MB/s]\n",
      "preprocessor_config.json: 100% 316/316 [00:00<00:00, 2.55MB/s]\n",
      "pytorch_model.bin: 100% 1.71G/1.71G [00:04<00:00, 347MB/s]\n",
      "[2024-09-21 00:52:51,336] [INFO] [partition_parameters.py:348:__exit__] finished initializing model - num_params = 686, num_elems = 7.06B\n",
      "Formatting inputs...Skip in lazy mode\n",
      "/usr/local/lib/python3.10/dist-packages/transformers/deepspeed.py:23: FutureWarning: transformers.deepspeed module is deprecated and will be removed in a future version. Please import deepspeed modules directly from transformers.integrations\n",
      "  warnings.warn(\n",
      "Parameter Offload: Total persistent parameters: 599040 in 312 params\n",
      "  0% 0/130 [00:00<?, ?it/s]/usr/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n",
      "  self.pid = os.fork()\n",
      "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:61: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n",
      "  warnings.warn(\n",
      "{'loss': 1.9207, 'learning_rate': 5e-05, 'epoch': 0.01}\n",
      "{'loss': 2.118, 'learning_rate': 0.0001, 'epoch': 0.02}\n",
      "{'loss': 1.6691, 'learning_rate': 0.00015000000000000001, 'epoch': 0.02}\n",
      "{'loss': 1.0591, 'learning_rate': 0.0002, 'epoch': 0.03}\n",
      "{'loss': 1.1013, 'learning_rate': 0.00019996891820008164, 'epoch': 0.04}\n",
      "{'loss': 0.7172, 'learning_rate': 0.00019987569212189224, 'epoch': 0.05}\n",
      "{'loss': 0.818, 'learning_rate': 0.00019972037971811802, 'epoch': 0.05}\n",
      "{'loss': 0.7755, 'learning_rate': 0.00019950307753654017, 'epoch': 0.06}\n",
      "{'loss': 0.7291, 'learning_rate': 0.00019922392066001722, 'epoch': 0.07}\n",
      "{'loss': 0.7732, 'learning_rate': 0.00019888308262251285, 'epoch': 0.08}\n",
      "{'loss': 0.7987, 'learning_rate': 0.00019848077530122083, 'epoch': 0.08}\n",
      "{'loss': 0.7909, 'learning_rate': 0.00019801724878485438, 'epoch': 0.09}\n",
      "{'loss': 0.8374, 'learning_rate': 0.00019749279121818235, 'epoch': 0.1}\n",
      "{'loss': 0.6982, 'learning_rate': 0.0001969077286229078, 'epoch': 0.11}\n",
      "{'loss': 0.6, 'learning_rate': 0.0001962624246950012, 'epoch': 0.12}\n",
      "{'loss': 0.5954, 'learning_rate': 0.0001955572805786141, 'epoch': 0.12}\n",
      "{'loss': 0.6166, 'learning_rate': 0.0001947927346167132, 'epoch': 0.13}\n",
      "{'loss': 0.5259, 'learning_rate': 0.00019396926207859084, 'epoch': 0.14}\n",
      "{'loss': 0.7272, 'learning_rate': 0.00019308737486442045, 'epoch': 0.15}\n",
      "{'loss': 0.6852, 'learning_rate': 0.00019214762118704076, 'epoch': 0.15}\n",
      "{'loss': 0.6652, 'learning_rate': 0.00019115058523116733, 'epoch': 0.16}\n",
      "{'loss': 0.6103, 'learning_rate': 0.0001900968867902419, 'epoch': 0.17}\n",
      "{'loss': 0.6218, 'learning_rate': 0.0001889871808811469, 'epoch': 0.18}\n",
      "{'loss': 0.6948, 'learning_rate': 0.00018782215733702286, 'epoch': 0.18}\n",
      "{'loss': 0.511, 'learning_rate': 0.00018660254037844388, 'epoch': 0.19}\n",
      "{'loss': 0.6502, 'learning_rate': 0.00018532908816321558, 'epoch': 0.2}\n",
      "{'loss': 0.7126, 'learning_rate': 0.00018400259231507717, 'epoch': 0.21}\n",
      "{'loss': 0.8326, 'learning_rate': 0.0001826238774315995, 'epoch': 0.22}\n",
      "{'loss': 0.5867, 'learning_rate': 0.00018119380057158568, 'epoch': 0.22}\n",
      "{'loss': 0.7719, 'learning_rate': 0.00017971325072229226, 'epoch': 0.23}\n",
      "{'loss': 0.621, 'learning_rate': 0.000178183148246803, 'epoch': 0.24}\n",
      "{'loss': 0.5924, 'learning_rate': 0.0001766044443118978, 'epoch': 0.25}\n",
      "{'loss': 0.4965, 'learning_rate': 0.00017497812029677344, 'epoch': 0.25}\n",
      "{'loss': 0.5869, 'learning_rate': 0.00017330518718298264, 'epoch': 0.26}\n",
      "{'loss': 0.7057, 'learning_rate': 0.00017158668492597186, 'epoch': 0.27}\n",
      "{'loss': 0.5441, 'learning_rate': 0.00016982368180860728, 'epoch': 0.28}\n",
      "{'loss': 0.5765, 'learning_rate': 0.00016801727377709194, 'epoch': 0.28}\n",
      "{'loss': 0.673, 'learning_rate': 0.00016616858375968595, 'epoch': 0.29}\n",
      "{'loss': 0.6578, 'learning_rate': 0.00016427876096865394, 'epoch': 0.3}\n",
      "{'loss': 0.59, 'learning_rate': 0.00016234898018587337, 'epoch': 0.31}\n",
      "{'loss': 0.4679, 'learning_rate': 0.00016038044103254775, 'epoch': 0.32}\n",
      "{'loss': 0.4859, 'learning_rate': 0.000158374367223479, 'epoch': 0.32}\n",
      "{'loss': 0.5695, 'learning_rate': 0.0001563320058063622, 'epoch': 0.33}\n",
      "{'loss': 0.5171, 'learning_rate': 0.00015425462638657595, 'epoch': 0.34}\n",
      "{'loss': 0.4349, 'learning_rate': 0.0001521435203379498, 'epoch': 0.35}\n",
      "{'loss': 0.593, 'learning_rate': 0.00015000000000000001, 'epoch': 0.35}\n",
      "{'loss': 0.6236, 'learning_rate': 0.00014782539786213183, 'epoch': 0.36}\n",
      "{'loss': 0.6262, 'learning_rate': 0.0001456210657353163, 'epoch': 0.37}\n",
      "{'loss': 0.5783, 'learning_rate': 0.00014338837391175582, 'epoch': 0.38}\n",
      "{'loss': 0.5118, 'learning_rate': 0.00014112871031306119, 'epoch': 0.38}\n",
      "{'loss': 0.5476, 'learning_rate': 0.00013884347962746948, 'epoch': 0.39}\n",
      "{'loss': 0.5761, 'learning_rate': 0.00013653410243663952, 'epoch': 0.4}\n",
      "{'loss': 0.5885, 'learning_rate': 0.00013420201433256689, 'epoch': 0.41}\n",
      "{'loss': 0.6564, 'learning_rate': 0.00013184866502516845, 'epoch': 0.42}\n",
      "{'loss': 0.739, 'learning_rate': 0.00012947551744109043, 'epoch': 0.42}\n",
      "{'loss': 0.76, 'learning_rate': 0.00012708404681430053, 'epoch': 0.43}\n",
      "{'loss': 0.5222, 'learning_rate': 0.00012467573976902935, 'epoch': 0.44}\n",
      "{'loss': 0.5721, 'learning_rate': 0.00012225209339563145, 'epoch': 0.45}\n",
      "{'loss': 0.6089, 'learning_rate': 0.00011981461431993977, 'epoch': 0.45}\n",
      "{'loss': 0.6067, 'learning_rate': 0.00011736481776669306, 'epoch': 0.46}\n",
      "{'loss': 0.5093, 'learning_rate': 0.00011490422661761744, 'epoch': 0.47}\n",
      "{'loss': 0.6065, 'learning_rate': 0.00011243437046474853, 'epoch': 0.48}\n",
      "{'loss': 0.4474, 'learning_rate': 0.00010995678465958168, 'epoch': 0.48}\n",
      "{'loss': 0.55, 'learning_rate': 0.00010747300935864243, 'epoch': 0.49}\n",
      "{'loss': 0.4878, 'learning_rate': 0.00010498458856606972, 'epoch': 0.5}\n",
      "{'loss': 0.4153, 'learning_rate': 0.0001024930691738073, 'epoch': 0.51}\n",
      "{'loss': 0.5653, 'learning_rate': 0.0001, 'epoch': 0.52}\n",
      "{'loss': 0.4938, 'learning_rate': 9.750693082619273e-05, 'epoch': 0.52}\n",
      "{'loss': 0.4816, 'learning_rate': 9.501541143393028e-05, 'epoch': 0.53}\n",
      "{'loss': 0.4389, 'learning_rate': 9.252699064135758e-05, 'epoch': 0.54}\n",
      "{'loss': 0.5041, 'learning_rate': 9.004321534041835e-05, 'epoch': 0.55}\n",
      "{'loss': 0.4546, 'learning_rate': 8.756562953525152e-05, 'epoch': 0.55}\n",
      "{'loss': 0.5348, 'learning_rate': 8.509577338238255e-05, 'epoch': 0.56}\n",
      "{'loss': 0.6939, 'learning_rate': 8.263518223330697e-05, 'epoch': 0.57}\n",
      "{'loss': 0.6287, 'learning_rate': 8.018538568006027e-05, 'epoch': 0.58}\n",
      "{'loss': 0.7004, 'learning_rate': 7.774790660436858e-05, 'epoch': 0.58}\n",
      "{'loss': 0.5614, 'learning_rate': 7.532426023097063e-05, 'epoch': 0.59}\n",
      "{'loss': 0.5321, 'learning_rate': 7.291595318569951e-05, 'epoch': 0.6}\n",
      "{'loss': 0.428, 'learning_rate': 7.052448255890957e-05, 'epoch': 0.61}\n",
      "{'loss': 0.5208, 'learning_rate': 6.815133497483157e-05, 'epoch': 0.62}\n",
      "{'loss': 0.3875, 'learning_rate': 6.579798566743314e-05, 'epoch': 0.62}\n",
      "{'loss': 0.4374, 'learning_rate': 6.34658975633605e-05, 'epoch': 0.63}\n",
      "{'loss': 0.5944, 'learning_rate': 6.115652037253053e-05, 'epoch': 0.64}\n",
      "{'loss': 0.5426, 'learning_rate': 5.887128968693887e-05, 'epoch': 0.65}\n",
      "{'loss': 0.5236, 'learning_rate': 5.6611626088244194e-05, 'epoch': 0.65}\n",
      "{'loss': 0.5215, 'learning_rate': 5.43789342646837e-05, 'epoch': 0.66}\n",
      "{'loss': 0.6276, 'learning_rate': 5.217460213786821e-05, 'epoch': 0.67}\n",
      "{'loss': 0.6398, 'learning_rate': 5.000000000000002e-05, 'epoch': 0.68}\n",
      "{'loss': 0.5767, 'learning_rate': 4.78564796620502e-05, 'epoch': 0.68}\n",
      "{'loss': 0.5475, 'learning_rate': 4.574537361342407e-05, 'epoch': 0.69}\n",
      "{'loss': 0.6272, 'learning_rate': 4.3667994193637796e-05, 'epoch': 0.7}\n",
      "{'loss': 0.6999, 'learning_rate': 4.1625632776521037e-05, 'epoch': 0.71}\n",
      "{'loss': 0.5685, 'learning_rate': 3.961955896745224e-05, 'epoch': 0.72}\n",
      "{'loss': 0.4134, 'learning_rate': 3.7651019814126654e-05, 'epoch': 0.72}\n",
      "{'loss': 0.5588, 'learning_rate': 3.5721239031346066e-05, 'epoch': 0.73}\n",
      "{'loss': 0.5292, 'learning_rate': 3.383141624031408e-05, 'epoch': 0.74}\n",
      "{'loss': 0.5415, 'learning_rate': 3.198272622290804e-05, 'epoch': 0.75}\n",
      "{'loss': 0.5875, 'learning_rate': 3.0176318191392726e-05, 'epoch': 0.75}\n",
      "{'loss': 0.5756, 'learning_rate': 2.8413315074028158e-05, 'epoch': 0.76}\n",
      "{'loss': 0.5324, 'learning_rate': 2.669481281701739e-05, 'epoch': 0.77}\n",
      "{'loss': 0.5035, 'learning_rate': 2.502187970322657e-05, 'epoch': 0.78}\n",
      "{'loss': 0.5883, 'learning_rate': 2.339555568810221e-05, 'epoch': 0.78}\n",
      "{'loss': 0.3632, 'learning_rate': 2.181685175319702e-05, 'epoch': 0.79}\n",
      "{'loss': 0.4206, 'learning_rate': 2.0286749277707782e-05, 'epoch': 0.8}\n",
      "{'loss': 0.4867, 'learning_rate': 1.880619942841435e-05, 'epoch': 0.81}\n",
      "{'loss': 0.5097, 'learning_rate': 1.7376122568400532e-05, 'epoch': 0.82}\n",
      "{'loss': 0.4196, 'learning_rate': 1.5997407684922862e-05, 'epoch': 0.82}\n",
      "{'loss': 0.4502, 'learning_rate': 1.467091183678444e-05, 'epoch': 0.83}\n",
      "{'loss': 0.4994, 'learning_rate': 1.339745962155613e-05, 'epoch': 0.84}\n",
      "{'loss': 0.5945, 'learning_rate': 1.2177842662977135e-05, 'epoch': 0.85}\n",
      "{'loss': 0.6632, 'learning_rate': 1.1012819118853147e-05, 'epoch': 0.85}\n",
      "{'loss': 0.5267, 'learning_rate': 9.903113209758096e-06, 'epoch': 0.86}\n",
      "{'loss': 0.4464, 'learning_rate': 8.849414768832687e-06, 'epoch': 0.87}\n",
      "{'loss': 0.5007, 'learning_rate': 7.852378812959227e-06, 'epoch': 0.88}\n",
      "{'loss': 0.4689, 'learning_rate': 6.9126251355795864e-06, 'epoch': 0.88}\n",
      "{'loss': 0.6377, 'learning_rate': 6.030737921409169e-06, 'epoch': 0.89}\n",
      "{'loss': 0.4218, 'learning_rate': 5.20726538328683e-06, 'epoch': 0.9}\n",
      "{'loss': 0.4671, 'learning_rate': 4.442719421385922e-06, 'epoch': 0.91}\n",
      "{'loss': 0.7304, 'learning_rate': 3.7375753049987973e-06, 'epoch': 0.92}\n",
      "{'loss': 0.4949, 'learning_rate': 3.092271377092215e-06, 'epoch': 0.92}\n",
      "{'loss': 0.4776, 'learning_rate': 2.5072087818176382e-06, 'epoch': 0.93}\n",
      "{'loss': 0.4822, 'learning_rate': 1.9827512151456173e-06, 'epoch': 0.94}\n",
      "{'loss': 0.4209, 'learning_rate': 1.5192246987791981e-06, 'epoch': 0.95}\n",
      "{'loss': 0.5444, 'learning_rate': 1.1169173774871478e-06, 'epoch': 0.95}\n",
      "{'loss': 0.4981, 'learning_rate': 7.760793399827937e-07, 'epoch': 0.96}\n",
      "{'loss': 0.5573, 'learning_rate': 4.969224634598591e-07, 'epoch': 0.97}\n",
      "{'loss': 0.5132, 'learning_rate': 2.7962028188198706e-07, 'epoch': 0.98}\n",
      "{'loss': 0.6259, 'learning_rate': 1.2430787810776555e-07, 'epoch': 0.98}\n",
      "{'loss': 0.6139, 'learning_rate': 3.1081799918375454e-08, 'epoch': 0.99}\n",
      " 99% 129/130 [04:23<00:01,  1.90s/it]Invalidate trace cache @ step 308: expected module 4, but got module 3\n",
      "{'loss': 0.6752, 'learning_rate': 0.0, 'epoch': 1.0}\n",
      "{'train_runtime': 265.7004, 'train_samples_per_second': 7.787, 'train_steps_per_second': 0.489, 'train_loss': 0.615638983478913, 'epoch': 1.0}\n",
      "100% 130/130 [04:25<00:00,  2.04s/it]\n",
      "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n",
      "Non-default generation parameters: {'max_length': 4096}\n",
      "You are using a model of type llava to instantiate a model of type llava_llama. This is not supported for all configurations of models and can yield errors.\n",
      "[2024-09-21 00:57:27,100] [INFO] [launch.py:347:main] Process 8577 exits successfully.\n"
     ]
    }
   ],
   "source": [
    "!deepspeed LLaVA/llava/train/train_mem.py \\\n",
    "    --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \\\n",
    "    --deepspeed LLaVA/scripts/zero3.json \\\n",
    "    --model_name_or_path liuhaotian/llava-v1.5-7b \\\n",
    "    --version v1 \\\n",
    "    --data_path ./dataset_train/train/train_dataset.json \\\n",
    "    --image_folder ./dataset_train/images \\\n",
    "    --vision_tower openai/clip-vit-large-patch14-336 \\\n",
    "    --mm_projector_type mlp2x_gelu \\\n",
    "    --mm_vision_select_layer -2 \\\n",
    "    --mm_use_im_start_end False \\\n",
    "    --mm_use_im_patch_token False \\\n",
    "    --image_aspect_ratio pad \\\n",
    "    --group_by_modality_length True \\\n",
    "    --bf16 True \\\n",
    "    --output_dir ./checkpoints/llava-v1.5-7b-task-lora \\\n",
    "    --num_train_epochs 1 \\\n",
    "    --per_device_train_batch_size 16 \\\n",
    "    --per_device_eval_batch_size 4 \\\n",
    "    --gradient_accumulation_steps 1 \\\n",
    "    --evaluation_strategy \"no\" \\\n",
    "    --save_strategy \"steps\" \\\n",
    "    --save_steps 50000 \\\n",
    "    --save_total_limit 1 \\\n",
    "    --learning_rate 2e-4 \\\n",
    "    --weight_decay 0. \\\n",
    "    --warmup_ratio 0.03 \\\n",
    "    --lr_scheduler_type \"cosine\" \\\n",
    "    --logging_steps 1 \\\n",
    "    --tf32 True \\\n",
    "    --model_max_length 2048 \\\n",
    "    --gradient_checkpointing True \\\n",
    "    --dataloader_num_workers 4 \\\n",
    "    --lazy_preprocess True \\\n",
    "    --report_to tensorboard"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nvcc: NVIDIA (R) Cuda compiler driver\n",
      "Copyright (c) 2005-2023 NVIDIA Corporation\n",
      "Built on Tue_Aug_15_22:02:13_PDT_2023\n",
      "Cuda compilation tools, release 12.2, V12.2.140\n",
      "Build cuda_12.2.r12.2/compiler.33191640_0\n"
     ]
    }
   ],
   "source": [
    "!nvcc --version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.1.2+cu121\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "print(torch.__version__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fri Sep 20 19:44:44 2024       \n",
      "+---------------------------------------------------------------------------------------+\n",
      "| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |\n",
      "|-----------------------------------------+----------------------+----------------------+\n",
      "| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
      "| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n",
      "|                                         |                      |               MIG M. |\n",
      "|=========================================+======================+======================|\n",
      "|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |\n",
      "| N/A   36C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |\n",
      "|                                         |                      |                  N/A |\n",
      "+-----------------------------------------+----------------------+----------------------+\n",
      "                                                                                         \n",
      "+---------------------------------------------------------------------------------------+\n",
      "| Processes:                                                                            |\n",
      "|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n",
      "|        ID   ID                                                             Usage      |\n",
      "|=======================================================================================|\n",
      "|  No running processes found                                                           |\n",
      "+---------------------------------------------------------------------------------------+\n"
     ]
    }
   ],
   "source": [
    "!nvidia-smi"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Generate Predictions using Trained Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2024-09-21 01:59:18,379] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
      "2024-09-21 01:59:19.727775: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
      "2024-09-21 01:59:19.749533: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
      "2024-09-21 01:59:19.756030: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "2024-09-21 01:59:21.018312: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
      "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
      "  warnings.warn(\n",
      "tokenizer_config.json: 100% 749/749 [00:00<00:00, 4.55MB/s]\n",
      "tokenizer.model: 100% 500k/500k [00:00<00:00, 66.4MB/s]\n",
      "special_tokens_map.json: 100% 438/438 [00:00<00:00, 3.30MB/s]\n",
      "Loading LLaVA from base model...\n",
      "pytorch_model.bin.index.json: 100% 27.1k/27.1k [00:00<00:00, 88.0MB/s]\n",
      "Downloading shards:   0% 0/2 [00:00<?, ?it/s]\n",
      "pytorch_model-00001-of-00002.bin:   0% 0.00/9.98G [00:00<?, ?B/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   0% 21.0M/9.98G [00:00<01:23, 119MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   1% 62.9M/9.98G [00:00<00:42, 236MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   1% 115M/9.98G [00:00<00:30, 328MB/s] \u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   2% 168M/9.98G [00:00<00:25, 378MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   2% 220M/9.98G [00:00<00:23, 413MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   3% 273M/9.98G [00:00<00:22, 433MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   3% 325M/9.98G [00:00<00:21, 452MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   4% 377M/9.98G [00:00<00:20, 465MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   4% 430M/9.98G [00:01<00:20, 469MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   5% 482M/9.98G [00:01<00:20, 458MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   5% 535M/9.98G [00:01<00:21, 436MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   6% 587M/9.98G [00:01<00:21, 436MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   6% 640M/9.98G [00:01<00:20, 452MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   7% 692M/9.98G [00:01<00:20, 464MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   7% 744M/9.98G [00:01<00:19, 472MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   8% 797M/9.98G [00:01<00:19, 478MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   9% 849M/9.98G [00:01<00:18, 483MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:   9% 902M/9.98G [00:02<00:19, 476MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  10% 954M/9.98G [00:02<00:19, 471MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  10% 1.01G/9.98G [00:02<00:19, 467MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  11% 1.06G/9.98G [00:02<00:20, 443MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  11% 1.11G/9.98G [00:02<00:20, 441MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  12% 1.16G/9.98G [00:02<00:22, 396MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  12% 1.21G/9.98G [00:02<00:22, 384MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  13% 1.26G/9.98G [00:02<00:22, 393MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  13% 1.31G/9.98G [00:03<00:21, 406MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  14% 1.35G/9.98G [00:03<00:22, 376MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  14% 1.39G/9.98G [00:03<00:23, 369MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  14% 1.44G/9.98G [00:03<00:24, 349MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  15% 1.49G/9.98G [00:03<00:21, 390MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  15% 1.54G/9.98G [00:03<00:20, 416MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  16% 1.59G/9.98G [00:03<00:19, 420MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  17% 1.65G/9.98G [00:03<00:19, 427MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  17% 1.70G/9.98G [00:04<00:18, 439MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  18% 1.75G/9.98G [00:04<00:18, 444MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  18% 1.80G/9.98G [00:04<00:18, 447MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  19% 1.86G/9.98G [00:04<00:18, 432MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  19% 1.91G/9.98G [00:04<00:19, 414MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  20% 1.96G/9.98G [00:04<00:18, 423MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  20% 2.01G/9.98G [00:04<00:19, 400MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  21% 2.06G/9.98G [00:04<00:20, 387MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  21% 2.10G/9.98G [00:05<00:20, 385MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  22% 2.15G/9.98G [00:05<00:19, 405MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  22% 2.20G/9.98G [00:05<00:18, 419MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  23% 2.25G/9.98G [00:05<00:18, 409MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  23% 2.30G/9.98G [00:05<00:19, 388MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  23% 2.34G/9.98G [00:05<00:20, 376MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  24% 2.38G/9.98G [00:05<00:20, 380MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  24% 2.42G/9.98G [00:05<00:21, 357MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  25% 2.46G/9.98G [00:05<00:20, 365MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  25% 2.52G/9.98G [00:06<00:19, 386MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  26% 2.57G/9.98G [00:06<00:18, 402MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  26% 2.62G/9.98G [00:06<00:17, 412MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  27% 2.66G/9.98G [00:06<00:17, 414MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  27% 2.72G/9.98G [00:06<00:17, 425MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  28% 2.77G/9.98G [00:06<00:16, 427MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  28% 2.82G/9.98G [00:06<00:16, 436MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  29% 2.87G/9.98G [00:06<00:17, 417MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  29% 2.92G/9.98G [00:07<00:17, 415MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  30% 2.96G/9.98G [00:07<00:20, 345MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  30% 3.00G/9.98G [00:07<00:21, 326MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  30% 3.04G/9.98G [00:09<02:02, 56.4MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  31% 3.10G/9.98G [00:09<01:20, 84.9MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  32% 3.16G/9.98G [00:09<00:59, 115MB/s] \u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  32% 3.20G/9.98G [00:10<00:48, 140MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  32% 3.24G/9.98G [00:10<00:40, 168MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  33% 3.29G/9.98G [00:10<00:35, 190MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  33% 3.33G/9.98G [00:10<00:33, 199MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  34% 3.39G/9.98G [00:10<00:26, 246MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  34% 3.44G/9.98G [00:10<00:22, 291MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  35% 3.49G/9.98G [00:10<00:19, 334MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  36% 3.54G/9.98G [00:10<00:17, 371MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  36% 3.60G/9.98G [00:11<00:16, 378MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  37% 3.65G/9.98G [00:11<00:16, 374MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  37% 3.70G/9.98G [00:11<00:15, 403MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  38% 3.75G/9.98G [00:11<00:15, 402MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  38% 3.81G/9.98G [00:11<00:17, 350MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  39% 3.85G/9.98G [00:11<00:18, 337MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  39% 3.89G/9.98G [00:11<00:17, 342MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  40% 3.94G/9.98G [00:12<00:16, 375MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  40% 3.98G/9.98G [00:12<00:15, 378MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  40% 4.04G/9.98G [00:12<00:15, 383MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  41% 4.08G/9.98G [00:12<00:15, 370MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  41% 4.12G/9.98G [00:12<00:17, 343MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  42% 4.16G/9.98G [00:12<00:16, 356MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  42% 4.20G/9.98G [00:12<00:16, 356MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  43% 4.25G/9.98G [00:12<00:16, 348MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  43% 4.29G/9.98G [00:12<00:16, 352MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  43% 4.33G/9.98G [00:13<00:16, 348MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  44% 4.37G/9.98G [00:13<00:15, 351MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  44% 4.41G/9.98G [00:13<00:16, 330MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  45% 4.46G/9.98G [00:13<00:16, 327MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  45% 4.51G/9.98G [00:13<00:14, 374MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  46% 4.55G/9.98G [00:13<00:15, 358MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  46% 4.60G/9.98G [00:13<00:13, 392MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  47% 4.65G/9.98G [00:13<00:13, 385MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  47% 4.69G/9.98G [00:14<00:14, 371MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  47% 4.73G/9.98G [00:14<00:15, 348MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  48% 4.77G/9.98G [00:14<00:14, 351MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  48% 4.81G/9.98G [00:14<00:15, 334MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  49% 4.85G/9.98G [00:14<00:15, 340MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  49% 4.90G/9.98G [00:14<00:18, 268MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  50% 4.95G/9.98G [00:14<00:16, 302MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  50% 4.99G/9.98G [00:15<00:15, 316MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  50% 5.03G/9.98G [00:15<00:15, 309MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  51% 5.08G/9.98G [00:15<00:15, 315MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  51% 5.12G/9.98G [00:15<00:14, 335MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  52% 5.16G/9.98G [00:15<00:13, 345MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  52% 5.21G/9.98G [00:15<00:13, 343MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  53% 5.25G/9.98G [00:15<00:13, 344MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  53% 5.31G/9.98G [00:15<00:12, 372MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  54% 5.35G/9.98G [00:16<00:12, 359MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  54% 5.39G/9.98G [00:16<00:13, 340MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  55% 5.44G/9.98G [00:16<00:12, 363MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  55% 5.48G/9.98G [00:16<00:12, 360MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  55% 5.53G/9.98G [00:16<00:12, 365MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  56% 5.57G/9.98G [00:16<00:12, 367MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  56% 5.61G/9.98G [00:16<00:11, 377MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  57% 5.65G/9.98G [00:16<00:11, 372MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  57% 5.70G/9.98G [00:17<00:10, 401MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  58% 5.76G/9.98G [00:17<00:10, 419MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  58% 5.81G/9.98G [00:17<00:10, 408MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  59% 5.85G/9.98G [00:17<00:10, 388MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  59% 5.90G/9.98G [00:17<00:09, 414MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  60% 5.96G/9.98G [00:17<00:09, 428MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  60% 6.01G/9.98G [00:17<00:09, 400MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  61% 6.06G/9.98G [00:17<00:09, 424MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  61% 6.11G/9.98G [00:17<00:08, 443MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  62% 6.17G/9.98G [00:18<00:08, 456MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  62% 6.22G/9.98G [00:18<00:08, 465MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  63% 6.27G/9.98G [00:18<00:11, 314MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  63% 6.31G/9.98G [00:18<00:13, 269MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  64% 6.35G/9.98G [00:18<00:14, 255MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  64% 6.39G/9.98G [00:19<00:14, 255MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  64% 6.42G/9.98G [00:19<00:14, 251MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  65% 6.45G/9.98G [00:19<00:14, 245MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  65% 6.48G/9.98G [00:19<00:14, 237MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  65% 6.51G/9.98G [00:19<00:14, 232MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  66% 6.54G/9.98G [00:19<00:14, 230MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  66% 6.57G/9.98G [00:19<00:15, 227MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  66% 6.61G/9.98G [00:19<00:14, 236MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  67% 6.64G/9.98G [00:20<00:13, 239MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  67% 6.67G/9.98G [00:20<00:13, 240MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  67% 6.70G/9.98G [00:20<00:13, 240MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  67% 6.73G/9.98G [00:20<00:18, 176MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  68% 6.77G/9.98G [00:20<00:14, 220MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  68% 6.81G/9.98G [00:20<00:14, 224MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  69% 6.84G/9.98G [00:21<00:15, 206MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  69% 6.87G/9.98G [00:21<00:16, 188MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  69% 6.89G/9.98G [00:21<00:17, 175MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  69% 6.91G/9.98G [00:21<00:19, 160MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  69% 6.93G/9.98G [00:21<00:21, 144MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  70% 6.95G/9.98G [00:22<00:56, 53.8MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  70% 7.00G/9.98G [00:22<00:31, 95.3MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  71% 7.04G/9.98G [00:23<00:25, 115MB/s] \u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  71% 7.07G/9.98G [00:23<00:21, 136MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  71% 7.10G/9.98G [00:23<00:17, 161MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  72% 7.14G/9.98G [00:23<00:13, 203MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  72% 7.18G/9.98G [00:23<00:11, 235MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  72% 7.22G/9.98G [00:23<00:10, 257MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  73% 7.27G/9.98G [00:23<00:09, 285MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  73% 7.31G/9.98G [00:23<00:08, 301MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  74% 7.35G/9.98G [00:24<00:08, 305MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  74% 7.39G/9.98G [00:24<00:08, 320MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  75% 7.43G/9.98G [00:24<00:07, 341MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  75% 7.49G/9.98G [00:24<00:06, 371MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  75% 7.53G/9.98G [00:24<00:06, 375MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  76% 7.58G/9.98G [00:24<00:06, 391MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  77% 7.63G/9.98G [00:24<00:05, 403MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  77% 7.68G/9.98G [00:24<00:06, 379MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  77% 7.72G/9.98G [00:25<00:05, 377MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  78% 7.76G/9.98G [00:25<00:05, 388MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  78% 7.81G/9.98G [00:25<00:05, 410MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  79% 7.86G/9.98G [00:25<00:05, 416MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  79% 7.91G/9.98G [00:25<00:04, 416MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  80% 7.95G/9.98G [00:25<00:05, 383MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  80% 8.00G/9.98G [00:25<00:04, 404MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  81% 8.05G/9.98G [00:25<00:04, 406MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  81% 8.10G/9.98G [00:25<00:04, 406MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  82% 8.15G/9.98G [00:26<00:04, 422MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  82% 8.20G/9.98G [00:26<00:04, 391MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  83% 8.24G/9.98G [00:26<00:04, 398MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  83% 8.29G/9.98G [00:26<00:04, 402MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  84% 8.35G/9.98G [00:26<00:03, 410MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  84% 8.40G/9.98G [00:26<00:03, 424MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  85% 8.45G/9.98G [00:26<00:03, 424MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  85% 8.50G/9.98G [00:26<00:03, 440MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  86% 8.56G/9.98G [00:27<00:03, 451MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  86% 8.61G/9.98G [00:27<00:03, 450MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  87% 8.66G/9.98G [00:27<00:02, 442MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  87% 8.71G/9.98G [00:27<00:03, 413MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  88% 8.76G/9.98G [00:27<00:03, 316MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  88% 8.80G/9.98G [00:27<00:03, 299MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  89% 8.84G/9.98G [00:27<00:04, 278MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  89% 8.87G/9.98G [00:28<00:04, 224MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  89% 8.91G/9.98G [00:28<00:04, 244MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  90% 8.94G/9.98G [00:28<00:04, 250MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  90% 8.98G/9.98G [00:28<00:04, 246MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  90% 9.01G/9.98G [00:28<00:04, 233MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  91% 9.04G/9.98G [00:28<00:04, 229MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  91% 9.07G/9.98G [00:29<00:04, 221MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  91% 9.10G/9.98G [00:29<00:04, 215MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  92% 9.13G/9.98G [00:29<00:04, 208MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  92% 9.16G/9.98G [00:29<00:04, 198MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  92% 9.19G/9.98G [00:29<00:04, 188MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  92% 9.21G/9.98G [00:29<00:04, 183MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  92% 9.23G/9.98G [00:29<00:04, 171MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  93% 9.25G/9.98G [00:30<00:04, 165MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  93% 9.27G/9.98G [00:30<00:04, 152MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  93% 9.29G/9.98G [00:30<00:04, 145MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  93% 9.31G/9.98G [00:30<00:05, 131MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  94% 9.33G/9.98G [00:30<00:05, 110MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  94% 9.36G/9.98G [00:31<00:04, 127MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  94% 9.40G/9.98G [00:31<00:03, 157MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  94% 9.43G/9.98G [00:31<00:03, 173MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  95% 9.45G/9.98G [00:31<00:03, 152MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  95% 9.47G/9.98G [00:31<00:03, 150MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  95% 9.49G/9.98G [00:31<00:03, 148MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  95% 9.51G/9.98G [00:31<00:03, 141MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  96% 9.54G/9.98G [00:32<00:02, 164MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  96% 9.58G/9.98G [00:32<00:01, 211MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  97% 9.64G/9.98G [00:32<00:01, 269MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  97% 9.69G/9.98G [00:32<00:00, 325MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  98% 9.74G/9.98G [00:32<00:00, 368MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  98% 9.79G/9.98G [00:32<00:00, 395MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  99% 9.85G/9.98G [00:32<00:00, 408MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin:  99% 9.90G/9.98G [00:32<00:00, 404MB/s]\u001b[A\n",
      "pytorch_model-00001-of-00002.bin: 100% 9.98G/9.98G [00:33<00:00, 301MB/s]\n",
      "Downloading shards:  50% 1/2 [00:33<00:33, 33.40s/it]\n",
      "pytorch_model-00002-of-00002.bin:   0% 0.00/3.54G [00:00<?, ?B/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:   1% 21.0M/3.54G [00:00<00:26, 134MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:   1% 52.4M/3.54G [00:00<00:17, 203MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:   3% 105M/3.54G [00:00<00:11, 299MB/s] \u001b[A\n",
      "pytorch_model-00002-of-00002.bin:   4% 157M/3.54G [00:00<00:09, 352MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:   6% 210M/3.54G [00:00<00:08, 386MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:   7% 262M/3.54G [00:00<00:08, 403MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:   9% 315M/3.54G [00:00<00:07, 415MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  10% 367M/3.54G [00:00<00:07, 429MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  12% 419M/3.54G [00:01<00:07, 411MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  13% 472M/3.54G [00:01<00:07, 422MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  15% 524M/3.54G [00:01<00:06, 435MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  16% 577M/3.54G [00:01<00:08, 362MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  17% 619M/3.54G [00:01<00:08, 345MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  19% 661M/3.54G [00:01<00:09, 309MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  20% 703M/3.54G [00:01<00:08, 322MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  21% 744M/3.54G [00:02<00:09, 296MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  22% 776M/3.54G [00:02<00:09, 287MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  23% 807M/3.54G [00:02<00:09, 287MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  24% 839M/3.54G [00:02<00:09, 282MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  25% 870M/3.54G [00:02<00:09, 287MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  25% 902M/3.54G [00:02<00:10, 244MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  26% 933M/3.54G [00:02<00:11, 227MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  27% 965M/3.54G [00:03<00:10, 237MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  28% 996M/3.54G [00:03<00:11, 216MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  29% 1.03G/3.54G [00:03<00:12, 206MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  30% 1.06G/3.54G [00:03<00:11, 209MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  31% 1.10G/3.54G [00:03<00:09, 252MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  32% 1.13G/3.54G [00:03<00:09, 264MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  33% 1.16G/3.54G [00:03<00:09, 257MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  34% 1.21G/3.54G [00:04<00:09, 256MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  35% 1.24G/3.54G [00:04<00:09, 245MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  36% 1.27G/3.54G [00:04<00:10, 222MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  37% 1.30G/3.54G [00:04<00:10, 208MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  38% 1.33G/3.54G [00:04<00:14, 152MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  38% 1.35G/3.54G [00:05<00:14, 149MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  39% 1.39G/3.54G [00:05<00:11, 192MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  40% 1.43G/3.54G [00:05<00:10, 199MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  41% 1.47G/3.54G [00:05<00:08, 235MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  43% 1.51G/3.54G [00:05<00:07, 260MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  44% 1.55G/3.54G [00:05<00:07, 272MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  45% 1.59G/3.54G [00:05<00:06, 280MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  46% 1.64G/3.54G [00:05<00:06, 300MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  47% 1.67G/3.54G [00:06<00:06, 293MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  48% 1.70G/3.54G [00:06<00:06, 263MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  49% 1.75G/3.54G [00:06<00:05, 312MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  51% 1.79G/3.54G [00:06<00:05, 336MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  52% 1.85G/3.54G [00:06<00:04, 368MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  54% 1.90G/3.54G [00:06<00:04, 385MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  55% 1.94G/3.54G [00:06<00:05, 314MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  56% 1.98G/3.54G [00:07<00:05, 299MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  57% 2.02G/3.54G [00:07<00:04, 309MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  58% 2.07G/3.54G [00:07<00:04, 309MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  59% 2.11G/3.54G [00:07<00:05, 274MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  60% 2.14G/3.54G [00:07<00:05, 263MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  62% 2.18G/3.54G [00:07<00:04, 287MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  63% 2.23G/3.54G [00:07<00:03, 327MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  64% 2.28G/3.54G [00:08<00:04, 301MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  65% 2.32G/3.54G [00:08<00:03, 311MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  67% 2.36G/3.54G [00:08<00:04, 269MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  68% 2.41G/3.54G [00:08<00:03, 309MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  70% 2.46G/3.54G [00:08<00:03, 346MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  71% 2.51G/3.54G [00:08<00:02, 359MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  72% 2.56G/3.54G [00:08<00:02, 386MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  73% 2.60G/3.54G [00:08<00:02, 384MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  75% 2.64G/3.54G [00:09<00:02, 308MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  76% 2.68G/3.54G [00:09<00:02, 293MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  77% 2.73G/3.54G [00:09<00:02, 321MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  78% 2.78G/3.54G [00:09<00:02, 359MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  80% 2.82G/3.54G [00:09<00:02, 286MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  81% 2.86G/3.54G [00:10<00:02, 229MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  82% 2.89G/3.54G [00:10<00:02, 236MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  83% 2.93G/3.54G [00:10<00:02, 221MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  83% 2.96G/3.54G [00:10<00:02, 209MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  84% 2.99G/3.54G [00:10<00:02, 226MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  86% 3.03G/3.54G [00:10<00:01, 266MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  87% 3.08G/3.54G [00:10<00:01, 305MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  88% 3.12G/3.54G [00:10<00:01, 313MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  89% 3.17G/3.54G [00:11<00:01, 338MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  91% 3.21G/3.54G [00:11<00:01, 327MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  92% 3.25G/3.54G [00:11<00:00, 307MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  93% 3.29G/3.54G [00:11<00:00, 308MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  94% 3.33G/3.54G [00:11<00:00, 326MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  95% 3.38G/3.54G [00:11<00:00, 322MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  97% 3.43G/3.54G [00:11<00:00, 356MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin:  98% 3.47G/3.54G [00:11<00:00, 320MB/s]\u001b[A\n",
      "pytorch_model-00002-of-00002.bin: 100% 3.54G/3.54G [00:12<00:00, 290MB/s]\n",
      "Downloading shards: 100% 2/2 [00:45<00:00, 22.94s/it]\n",
      "Loading checkpoint shards:   0% 0/2 [00:00<?, ?it/s]/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
      "  return self.fget.__get__(instance, owner)()\n",
      "Loading checkpoint shards: 100% 2/2 [00:00<00:00,  3.74it/s]\n",
      "generation_config.json: 100% 124/124 [00:00<00:00, 741kB/s]\n",
      "Loading additional LLaVA weights...\n",
      "Loading LoRA weights...\n",
      "Merging LoRA weights...\n",
      "Model is loaded...\n",
      "preprocessor_config.json: 100% 316/316 [00:00<00:00, 1.79MB/s]\n",
      "pytorch_model.bin: 100% 1.71G/1.71G [00:06<00:00, 280MB/s]\n",
      "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n",
      "Non-default generation parameters: {'max_length': 4096}\n"
     ]
    }
   ],
   "source": [
    " #merge the LoRA weights with the full base model\n",
    "!python LLaVA/scripts/merge_lora_weights.py --model-path checkpoints/llava-v1.5-7b-task-lora --model-base liuhaotian/llava-v1.5-7b --save-model-path llava-ftmodel\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2024-09-21 02:04:24,432] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
      "2024-09-21 02:04:25.771577: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
      "2024-09-21 02:04:25.792711: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
      "2024-09-21 02:04:25.799149: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "2024-09-21 02:04:27.025860: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
      "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
      "  warnings.warn(\n",
      "Loading checkpoint shards: 100% 3/3 [00:12<00:00,  4.18s/it]\n",
      "Some weights of the model checkpoint at llava-ftmodel were not used when initializing LlavaLlamaForCausalLM: ['model.vision_tower.vision_tower.vision_model.embeddings.class_embedding', 'model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight', 'model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.post_layernorm.bias', 'model.vision_tower.vision_tower.vision_model.post_layernorm.weight', 'model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias', 'model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight']\n",
      "- This IS expected if you are initializing LlavaLlamaForCausalLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing LlavaLlamaForCausalLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
      "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
      "  return self.fget.__get__(instance, owner)()\n",
      "Human: Please describe the findings in the X-ray.\n",
      "Assistant: <s_bone></s_bone><s_heart>Heart size is normal.</s_heart><s_lung>Lungs are clear. No pneumothorax or pleural effusion.</s_lung><s_mediastinal>Mediastinal contours are normal.</s_mediastinal><s_others>The heart and lungs are normal.</s_others>\n",
      "Human: Traceback (most recent call last):\n",
      "\n",
      "^C\n"
     ]
    }
   ],
   "source": [
    "# Run inference on 1 test image\n",
    "'''\n",
    "!python -m llava.serve.cli \\\n",
    "  --model-path llava-ftmodel \\\n",
    "  --image-file \"/content/drive/My Drive/ML-Quiz-XRay-ReportGeneration/dataset_test/images/cf33da4a-49f3-4dd1-8e5b-038d2637751f.png\"\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Evaluate the model on the test set\n",
    "!python -m llava.eval.model_vqa_science \\\n",
    "    --model-path llava-ftmodel \\\n",
    "    --question-file ./dataset_test/test/test_dataset.json \\\n",
    "    --image-folder ./dataset_test/images \\\n",
    "    --answers-file ./dataset_test/answers/llava-v1ft.5-7b.jsonl \\\n",
    "    --single-pred-prompt \\\n",
    "    --temperature 0 \\\n",
    "    --conv-mode vicuna_v1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Evaluate the model on the val set\n",
    "!python -m llava.eval.model_vqa_science \\\n",
    "    --model-path llava-ftmodel \\\n",
    "    --question-file ./dataset_val/val/val_dataset.json \\\n",
    "    --image-folder ./dataset_val/images \\\n",
    "    --answers-file ./dataset_val/answers/llava-v1ft.5-7b.jsonl \\\n",
    "    --single-pred-prompt \\\n",
    "    --temperature 0 \\\n",
    "    --conv-mode vicuna_v1"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}