Switch to unified view

a b/Roberta+LLM/compare_two_prompts.ipynb
1
{
2
  "cells": [
3
    {
4
      "cell_type": "markdown",
5
      "metadata": {
6
        "id": "view-in-github",
7
        "colab_type": "text"
8
      },
9
      "source": [
10
        "<a href=\"https://colab.research.google.com/github/jlopetegui98/NER-ClinicalTrials-Elegibility-Criteria/blob/main/Roberta%2BLLM/compare_two_prompts.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
11
      ]
12
    },
13
    {
14
      "cell_type": "code",
15
      "execution_count": 1,
16
      "metadata": {
17
        "colab": {
18
          "base_uri": "https://localhost:8080/"
19
        },
20
        "id": "QO5obdMWWJJw",
21
        "outputId": "c0b8b68d-1af6-4308-8ea1-873bd428ecd5"
22
      },
23
      "outputs": [
24
        {
25
          "output_type": "stream",
26
          "name": "stdout",
27
          "text": [
28
            "Mounted at /content/drive\n"
29
          ]
30
        }
31
      ],
32
      "source": [
33
        "# uncomment if working in colab\n",
34
        "from google.colab import drive\n",
35
        "drive.mount('/content/drive')"
36
      ]
37
    },
38
    {
39
      "cell_type": "code",
40
      "execution_count": 2,
41
      "metadata": {
42
        "colab": {
43
          "base_uri": "https://localhost:8080/"
44
        },
45
        "id": "uXHZTGK9WJJx",
46
        "outputId": "c8a0372f-8774-40df-d275-03e90c2c8848"
47
      },
48
      "outputs": [
49
        {
50
          "output_type": "stream",
51
          "name": "stdout",
52
          "text": [
53
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
54
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
55
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
56
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
57
            "\u001b[?25hCollecting seqeval\n",
58
            "  Downloading seqeval-1.2.2.tar.gz (43 kB)\n",
59
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 kB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
60
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
61
            "Requirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.25.2)\n",
62
            "Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.2.2)\n",
63
            "Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.11.4)\n",
64
            "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.4.0)\n",
65
            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (3.4.0)\n",
66
            "Building wheels for collected packages: seqeval\n",
67
            "  Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
68
            "  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16161 sha256=3d05e378a95e6360b53d3fd878ed43d9796294678465f3f139c5e30bef6ab718\n",
69
            "  Stored in directory: /root/.cache/pip/wheels/1a/67/4a/ad4082dd7dfc30f2abfe4d80a2ed5926a506eb8a972b4767fa\n",
70
            "Successfully built seqeval\n",
71
            "Installing collected packages: seqeval\n",
72
            "Successfully installed seqeval-1.2.2\n",
73
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.1/84.1 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
74
            "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
75
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
76
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
77
            "  Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
78
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m119.8/119.8 MB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
79
            "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
80
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
81
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
82
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m297.4/297.4 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
83
            "\u001b[?25h  Building wheel for peft (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
84
            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
85
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
86
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
87
            "  Building wheel for accelerate (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"
88
          ]
89
        }
90
      ],
91
      "source": [
92
        "# uncomment if using colab\n",
93
        "!pip install -q -U datasets\n",
94
        "!pip install seqeval\n",
95
        "!pip install -q -U evaluate\n",
96
        "!pip install -q -U git+https://github.com/huggingface/transformers.git\n",
97
        "!pip install -q -U bitsandbytes\n",
98
        "# !pip install -i https://pypi.org/simple/ bitsandbytes\n",
99
        "!pip install -q -U git+https://github.com/huggingface/peft.git\n",
100
        "!pip install -q -U git+https://github.com/huggingface/accelerate.git"
101
      ]
102
    },
103
    {
104
      "cell_type": "code",
105
      "execution_count": 1,
106
      "metadata": {
107
        "id": "iwRXECp_WJJx"
108
      },
109
      "outputs": [],
110
      "source": [
111
        "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextGenerationPipeline\n",
112
        "import torch\n",
113
        "import accelerate\n",
114
        "import os\n",
115
        "from utils import *"
116
      ]
117
    },
118
    {
119
      "cell_type": "code",
120
      "source": [
121
        "import pandas as pd\n",
122
        "from datasets import Dataset, DatasetDict"
123
      ],
124
      "metadata": {
125
        "id": "-De8g6lgpWtZ"
126
      },
127
      "execution_count": 2,
128
      "outputs": []
129
    },
130
    {
131
      "cell_type": "code",
132
      "source": [
133
        "from datasets import load_dataset, load_metric"
134
      ],
135
      "metadata": {
136
        "id": "e7Bpoz11bRe3"
137
      },
138
      "execution_count": 3,
139
      "outputs": []
140
    },
141
    {
142
      "cell_type": "code",
143
      "execution_count": 4,
144
      "metadata": {
145
        "colab": {
146
          "base_uri": "https://localhost:8080/",
147
          "height": 159,
148
          "referenced_widgets": [
149
            "0f4e594a6aa64dd697fb841d4207d4b7",
150
            "413c1cd08da84e32ba2de36d891bf86a",
151
            "ba7f75db7de145abb174658862ef50a3",
152
            "d55f2bd66fe044e4816348c1b4b032bf",
153
            "d1fd6600f76a4bf7968b0d13de093148",
154
            "b00aef23e75445c785a6b3b8756b9c94",
155
            "1fa716b65c7f47989184cc24bd56e5bd",
156
            "494b9dacddb948f7b08b509e7a79f3f1",
157
            "e3e2f93363b74e24bf22abcaf021b3dc",
158
            "b607f0954b3844e18ed2a887372d42c5",
159
            "95176ba773274023b4e3356bb3cb4cc9",
160
            "ac4c65187e864fd09e296fcbbe3ad6d8",
161
            "540f2e543376445d849e1a56ac38e4d4",
162
            "a9d9e19fa5ae41109e7c7cf9a4d8a13c",
163
            "4fd000047a0a402194cb3b99bb59d8f9",
164
            "9d08585ce6fd40538dc10d1c5d1aee70",
165
            "f4642fc990e94bf99c7d26017d8771ba",
166
            "9c49b620b2c04b83ae01bf4ab06e7270",
167
            "d47d0c2d528c40a38965fac763e450cf",
168
            "374d4f7c917d48c2b81f24cbfcfce062",
169
            "9f151f163b2c4966b9460dd31e184a84",
170
            "67a9e41b56a741baa4a953905d11c263",
171
            "e32ad767760d4bddb2f3a0a38327339b",
172
            "681dac30b90b4b02b38f9e5bb99429c0",
173
            "dd0df631524e42e48299283f1a9555e9",
174
            "609d5b49e085499994a11342e8dd425f",
175
            "3e7792c5d2484e24950f1f83f899ba4a",
176
            "8fae6bd7ebc54636a14b375fa46ec0dc",
177
            "7fa56a404f8847239109441a3b043a77",
178
            "d34e4794cb944c1cb8ae88845cad54fb",
179
            "4856843799e242ac8b72cf37ad1a159f",
180
            "c3c6d1e8aeb14e999ea3fadc10539861"
181
          ]
182
        },
183
        "id": "SMrGGcyPWJJy",
184
        "outputId": "1988c833-06f1-44c2-c832-525b2dabee55"
185
      },
186
      "outputs": [
187
        {
188
          "output_type": "display_data",
189
          "data": {
190
            "text/plain": [
191
              "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
192
            ],
193
            "application/vnd.jupyter.widget-view+json": {
194
              "version_major": 2,
195
              "version_minor": 0,
196
              "model_id": "0f4e594a6aa64dd697fb841d4207d4b7"
197
            }
198
          },
199
          "metadata": {}
200
        }
201
      ],
202
      "source": [
203
        "from huggingface_hub import notebook_login\n",
204
        "\n",
205
        "notebook_login()"
206
      ]
207
    },
208
    {
209
      "cell_type": "code",
210
      "execution_count": 5,
211
      "metadata": {
212
        "id": "eOQPAyi5WJJy"
213
      },
214
      "outputs": [],
215
      "source": [
216
        "# dict for the entities (entity to int value)\n",
217
        "simple_ent = {\"Condition\", \"Value\", \"Drug\", \"Procedure\", \"Measurement\", \"Temporal\", \"Observation\", \"Person\", \"Device\"}\n",
218
        "sel_ent = {\n",
219
        "    \"O\": 0,\n",
220
        "    \"B-Condition\": 1,\n",
221
        "    \"I-Condition\": 2,\n",
222
        "    \"B-Value\": 3,\n",
223
        "    \"I-Value\": 4,\n",
224
        "    \"B-Drug\": 5,\n",
225
        "    \"I-Drug\": 6,\n",
226
        "    \"B-Procedure\": 7,\n",
227
        "    \"I-Procedure\": 8,\n",
228
        "    \"B-Measurement\": 9,\n",
229
        "    \"I-Measurement\": 10,\n",
230
        "    \"B-Temporal\": 11,\n",
231
        "    \"I-Temporal\": 12,\n",
232
        "    \"B-Observation\": 13,\n",
233
        "    \"I-Observation\": 14,\n",
234
        "    \"B-Person\": 15,\n",
235
        "    \"I-Person\": 16,\n",
236
        "    \"B-Device\": 17,\n",
237
        "    \"I-Device\": 18\n",
238
        "}\n",
239
        "\n",
240
        "entities_list = list(sel_ent.keys())\n",
241
        "sel_ent_inv = {v: k for k, v in sel_ent.items()}"
242
      ]
243
    },
244
    {
245
      "cell_type": "code",
246
      "execution_count": 6,
247
      "metadata": {
248
        "id": "5mCzQOK7WJJy"
249
      },
250
      "outputs": [],
251
      "source": [
252
        "root = '..'\n",
253
        "root = './drive/MyDrive/TER-LISN-2024'\n",
254
        "data_path = f'{root}/data'\n",
255
        "models_path = f'{root}/models'"
256
      ]
257
    },
258
    {
259
      "cell_type": "code",
260
      "execution_count": 7,
261
      "metadata": {
262
        "id": "9WoScuG9WJJy"
263
      },
264
      "outputs": [],
265
      "source": [
266
        "model_name = \"mistralai/Mistral-7B-v0.1\""
267
      ]
268
    },
269
    {
270
      "cell_type": "code",
271
      "execution_count": 8,
272
      "metadata": {
273
        "colab": {
274
          "base_uri": "https://localhost:8080/",
275
          "height": 173,
276
          "referenced_widgets": [
277
            "e1b0ea4e59bd4bebb31544a4ca43fae6",
278
            "bb7c0220e2404676a2a37b5f8e45a560",
279
            "d9a38641722d4d1698c9eb12c9ba2aa2",
280
            "726b3712020e475e8e5734e51ad3255b",
281
            "b834f5ce5d4b4e8eac3688c2119e0e6e",
282
            "99fea94eaf344a1687764756225e9a88",
283
            "640023c221074c73a38d147a05cfedaf",
284
            "e2a1e70aa375450299d81baa7a007f1f",
285
            "4f06ce02f7b8452981731c87ac3814bc",
286
            "ed7294f2acf84015ac0c0c3b78c629cb",
287
            "1cf1101362c545738dcbb5e528f1af33"
288
          ]
289
        },
290
        "id": "-R1Ht6AqWJJy",
291
        "outputId": "9b883de3-f658-4402-c902-cfd946986427"
292
      },
293
      "outputs": [
294
        {
295
          "output_type": "stream",
296
          "name": "stderr",
297
          "text": [
298
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
299
            "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
300
            "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
301
            "You will be able to reuse this secret in all of your notebooks.\n",
302
            "Please note that authentication is recommended but still optional to access public models or datasets.\n",
303
            "  warnings.warn(\n"
304
          ]
305
        },
306
        {
307
          "output_type": "display_data",
308
          "data": {
309
            "text/plain": [
310
              "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
311
            ],
312
            "application/vnd.jupyter.widget-view+json": {
313
              "version_major": 2,
314
              "version_minor": 0,
315
              "model_id": "e1b0ea4e59bd4bebb31544a4ca43fae6"
316
            }
317
          },
318
          "metadata": {}
319
        }
320
      ],
321
      "source": [
322
        "# Load base model(Mistral 7B)\n",
323
        "bnb_config = BitsAndBytesConfig(\n",
324
        "    load_in_4bit= True,\n",
325
        "    bnb_4bit_quant_type= \"nf4\",\n",
326
        "    bnb_4bit_compute_dtype= torch.bfloat16,\n",
327
        "    bnb_4bit_use_double_quant= False,\n",
328
        ")\n",
329
        "model = AutoModelForCausalLM.from_pretrained(\n",
330
        "   model_name,\n",
331
        "    quantization_config=bnb_config,\n",
332
        "    device_map={\"\": 0}\n",
333
        ")"
334
      ]
335
    },
336
    {
337
      "cell_type": "code",
338
      "execution_count": 9,
339
      "metadata": {
340
        "colab": {
341
          "base_uri": "https://localhost:8080/"
342
        },
343
        "id": "zjt27HXtWJJz",
344
        "outputId": "0df3c4ce-03eb-4f64-9546-b790d5fdd3a8"
345
      },
346
      "outputs": [
347
        {
348
          "output_type": "execute_result",
349
          "data": {
350
            "text/plain": [
351
              "(True, True)"
352
            ]
353
          },
354
          "metadata": {},
355
          "execution_count": 9
356
        }
357
      ],
358
      "source": [
359
        "# import tokenizer for mistral-7B\n",
360
        "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
361
        "tokenizer.padding_side = 'left'\n",
362
        "tokenizer.pad_token = tokenizer.eos_token\n",
363
        "tokenizer.add_eos_token = True\n",
364
        "tokenizer.add_bos_token, tokenizer.add_eos_token"
365
      ]
366
    },
367
    {
368
      "cell_type": "code",
369
      "execution_count": 10,
370
      "metadata": {
371
        "id": "_CvK-Os_WJJz"
372
      },
373
      "outputs": [],
374
      "source": [
375
        "pipe  = TextGenerationPipeline(model = model, tokenizer = tokenizer)"
376
      ]
377
    },
378
    {
379
      "cell_type": "code",
380
      "source": [],
381
      "metadata": {
382
        "id": "ooEQwK2tSDat"
383
      },
384
      "execution_count": null,
385
      "outputs": []
386
    },
387
    {
388
      "cell_type": "code",
389
      "execution_count": 11,
390
      "metadata": {
391
        "id": "DjRonD-qWJJz"
392
      },
393
      "outputs": [],
394
      "source": [
395
        "dataset = load_dataset('JavierLopetegui/chia_v1')"
396
      ]
397
    },
398
    {
399
      "cell_type": "code",
400
      "execution_count": 12,
401
      "metadata": {
402
        "id": "4OY0LfqXWJJz"
403
      },
404
      "outputs": [],
405
      "source": [
406
        "# for each sentence save the text\n",
407
        "def generate_sentences_from_tokens(sentences):\n",
408
        "    texts_sentences = []\n",
409
        "    sentences_tokens = sentences['tokens']\n",
410
        "    for sentence in sentences_tokens:\n",
411
        "        sent_text = \" \".join(sentence)\n",
412
        "        texts_sentences.append(sent_text)\n",
413
        "    sentences['text'] = texts_sentences\n",
414
        "    return sentences"
415
      ]
416
    },
417
    {
418
      "cell_type": "code",
419
      "execution_count": 13,
420
      "metadata": {
421
        "id": "3RB2Ao-7WJJz"
422
      },
423
      "outputs": [],
424
      "source": [
425
        "def build_prompts(sentences, prompt_type=2):\n",
426
        "    sentences_prompts = []\n",
427
        "    for sent in sentences['text']:\n",
428
        "        prompt = build_prompt(sent, prompt_type)\n",
429
        "        sentences_prompts.append(prompt)\n",
430
        "    sentences['prompt'] = sentences_prompts\n",
431
        "    return sentences"
432
      ]
433
    },
434
    {
435
      "cell_type": "code",
436
      "execution_count": 14,
437
      "metadata": {
438
        "id": "brYkkOKPWJJ0"
439
      },
440
      "outputs": [],
441
      "source": [
442
        "dataset = dataset.map(lambda x: generate_sentences_from_tokens(x), batched = True)\n",
443
        "dataset_prompt1 = dataset.map(lambda x: build_prompts(x, prompt_type=1), batched = True)\n",
444
        "dataset_prompt2 = dataset.map(lambda x: build_prompts(x, prompt_type=2), batched = True)"
445
      ]
446
    },
447
    {
448
      "cell_type": "code",
449
      "execution_count": 15,
450
      "metadata": {
451
        "id": "-DsYscp_WJJ0"
452
      },
453
      "outputs": [],
454
      "source": [
455
        "test_dataset_p1 = dataset_prompt1['test']\n",
456
        "test_dataset_p2 = dataset_prompt2['test']"
457
      ]
458
    },
459
    {
460
      "cell_type": "code",
461
      "source": [
462
        "test_dataset_p1['prompt'][0]"
463
      ],
464
      "metadata": {
465
        "colab": {
466
          "base_uri": "https://localhost:8080/",
467
          "height": 209
468
        },
469
        "id": "brQSqtjxbbJ8",
470
        "outputId": "2da15406-ca32-490f-8349-c5f0b78fb1b9"
471
      },
472
      "execution_count": 16,
473
      "outputs": [
474
        {
475
          "output_type": "execute_result",
476
          "data": {
477
            "text/plain": [
478
              "'I need to perform a named entity recognition task on a  text related with inclusion criteria in clinical trials.\\n    The entities you need to recognize are: Condition, Value, Drug, Procedure, Measurement, Temporal, Observation, Person, Mood, Device and Pregnancy_considerations.\\n    Particularly you have to produce the ouput in the BIO format. I will show you an example of the expected output.\\n    Input text: Patients with symptomatic CNS metastases or leptomeningeal involvement\\n    Output:\\n    Patients O\\n    with O\\n    symptomatic O\\n    CNS B-Condition\\n    metastases I-Condition\\n    or O\\n    leptomeningeal B-Condition\\n    involvement I-Condition\\n\\n    You can see that tokens without any entity are labeled as O, and the tokens that are part of an entity are labeled as B-<entity> or I-<entity> depending on if they are the beginning or the inside of the entity.\\n    Please, just answer the question for this specific example and stop writting after that.\\n    Input text: self - reported healthy adults between the ages of 18 - 60 who are fluent in English .'"
479
            ],
480
            "application/vnd.google.colaboratory.intrinsic+json": {
481
              "type": "string"
482
            }
483
          },
484
          "metadata": {},
485
          "execution_count": 16
486
        }
487
      ]
488
    },
489
    {
490
      "cell_type": "code",
491
      "source": [
492
        "test_dataset_p2['prompt'][0]"
493
      ],
494
      "metadata": {
495
        "colab": {
496
          "base_uri": "https://localhost:8080/",
497
          "height": 209
498
        },
499
        "id": "tJzeOg9vbh3j",
500
        "outputId": "aaeb49fb-3d83-4832-abda-034df0f7aede"
501
      },
502
      "execution_count": 17,
503
      "outputs": [
504
        {
505
          "output_type": "execute_result",
506
          "data": {
507
            "text/plain": [
508
              "'I am working on a named entity recognition problem, in the context of clinical\\n    trials eligibility criteria. I will show you the list of entities:\\n    - Condition\\n    - Value\\n    - Drug\\n    - Procedure\\n    - Measurement\\n    - Temporal\\n    - Observation\\n    - Person\\n    - Mood\\n    - Device\\n\\n    Your task consists in annotate the named entities in a given sentence in the format I will explain you.\\n    I will explain you with some examples:\\n\\n    Example 1:\\n    Input: Patients who have received prior chemotherapy for unresectable disease.\\n    Output: Patients who have received prior <Procedure>chemotherapy</Procedure> for <Condition>unresectable disease</Condition>.\\n\\n    Example 2:\\n    Input: Patients with any other severe concurrent disease, which in the judgment of the investigator, would make the patient inappropriate for entry into this study.\\n    Ouput: Patients with any other severe <Condition>concurrent disease</Condition>, which in the judgment of the investigator, would make the patient <Mood>inappropriate for <Observation>entry into this study</Observation>.\\n\\n    As you can see, in each example, the extracted entities are enclosed using the sintax: <ENT>text of the entity</ENT>.\\n\\n    Please now annotate as explained before the following sentence:\\n\\n    Input: self - reported healthy adults between the ages of 18 - 60 who are fluent in English .'"
509
            ],
510
            "application/vnd.google.colaboratory.intrinsic+json": {
511
              "type": "string"
512
            }
513
          },
514
          "metadata": {},
515
          "execution_count": 17
516
        }
517
      ]
518
    },
519
    {
520
      "cell_type": "code",
521
      "execution_count": null,
522
      "metadata": {
523
        "id": "ANrMTM9xWJJ0"
524
      },
525
      "outputs": [],
526
      "source": [
527
        "# # keep just the prompt column\n",
528
        "# test_dataset_p1 = test_dataset_p1.remove_columns(['tokens', 'text', 'ner_tags', 'file'])\n",
529
        "# test_dataset_p2 = test_dataset_p2.remove_columns(['tokens', 'text', 'ner_tags', 'file'])"
530
      ]
531
    },
532
    {
533
      "cell_type": "code",
534
      "source": [],
535
      "metadata": {
536
        "id": "fTKRysEQWaSM"
537
      },
538
      "execution_count": null,
539
      "outputs": []
540
    },
541
    {
542
      "cell_type": "code",
543
      "execution_count": null,
544
      "metadata": {
545
        "id": "W4SlSxy7WJJ0"
546
      },
547
      "outputs": [],
548
      "source": [
549
        "# data_loader_p1 = torch.utils.data.DataLoader(test_dataset_p1, batch_size=4, shuffle=False)\n",
550
        "# data_loader_p2 = torch.utils.data.DataLoader(test_dataset_p2, batch_size=4, shuffle=False)"
551
      ]
552
    },
553
    {
554
      "cell_type": "code",
555
      "execution_count": null,
556
      "metadata": {
557
        "id": "Kgd_0VL8WJJ0"
558
      },
559
      "outputs": [],
560
      "source": [
561
        "# generated_sentences_p1 = []\n",
562
        "# for batch in data_loader_p1:\n",
563
        "#     generated_sentences_p1.extend(pipe(batch['prompt'], max_new_tokens = 500, return_full_text = False, handle_long_generation = \"hole\"))"
564
      ]
565
    },
566
    {
567
      "cell_type": "code",
568
      "execution_count": null,
569
      "metadata": {
570
        "id": "nX5E-USMWJJ0"
571
      },
572
      "outputs": [],
573
      "source": [
574
        "# generated_sentences_p2 = []\n",
575
        "# for batch in data_loader_p2:\n",
576
        "#     generated_sentences_p2.extend(pipe(batch['prompt'], max_new_tokens = 500, return_full_text = False, handle_long_generation = \"hole\"))"
577
      ]
578
    },
579
    {
580
      "cell_type": "code",
581
      "source": [
582
        "from tqdm import tqdm"
583
      ],
584
      "metadata": {
585
        "id": "0JaNvN8Ec1Ko"
586
      },
587
      "execution_count": 21,
588
      "outputs": []
589
    },
590
    {
591
      "cell_type": "code",
592
      "execution_count": 31,
593
      "metadata": {
594
        "colab": {
595
          "base_uri": "https://localhost:8080/"
596
        },
597
        "id": "yGBo9jS8WJJ0",
598
        "outputId": "cc2c9e02-f75c-40ac-a262-7be58e3e09d0"
599
      },
600
      "outputs": [
601
        {
602
          "output_type": "stream",
603
          "name": "stderr",
604
          "text": [
605
            " 12%|█▏        | 6/50 [04:11<29:42, 40.51s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
606
            "100%|██████████| 50/50 [33:08<00:00, 39.77s/it]\n"
607
          ]
608
        }
609
      ],
610
      "source": [
611
        "# generation one by one\n",
612
        "generated_sentences_p1 = []\n",
613
        "for sentence in tqdm(test_dataset_p1['prompt'][:50]):\n",
614
        "    output = pipe(sentence, max_new_tokens = 500, return_full_text = False, handle_long_generation = \"hole\")[0]['generated_text']\n",
615
        "    output = output.split('\\n\\n')[0]\n",
616
        "    generated_sentences_p1.append(output)"
617
      ]
618
    },
619
    {
620
      "cell_type": "code",
621
      "source": [
622
        "from tqdm import tqdm"
623
      ],
624
      "metadata": {
625
        "id": "rQ23uzq6A8NW"
626
      },
627
      "execution_count": 26,
628
      "outputs": []
629
    },
630
    {
631
      "cell_type": "code",
632
      "execution_count": 28,
633
      "metadata": {
634
        "colab": {
635
          "base_uri": "https://localhost:8080/"
636
        },
637
        "id": "fASHzsJCWJJ0",
638
        "outputId": "eb43ea5f-1eed-4794-945b-68fe63fd72a2"
639
      },
640
      "outputs": [
641
        {
642
          "output_type": "stream",
643
          "name": "stderr",
644
          "text": [
645
            " 14%|█▍        | 7/50 [04:47<29:07, 40.64s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
646
            "100%|██████████| 50/50 [34:05<00:00, 40.91s/it]\n"
647
          ]
648
        }
649
      ],
650
      "source": [
651
        "generated_sentences_p2 = []\n",
652
        "for sentence in tqdm(test_dataset_p2['prompt'][:50]):\n",
653
        "    sentence += '\\noutput: '\n",
654
        "    output = pipe(sentence, max_new_tokens = 500, return_full_text = False, handle_long_generation = \"hole\")[0]['generated_text']\n",
655
        "    output = output.split('\\n')[0]\n",
656
        "    generated_sentences_p2.append(output)"
657
      ]
658
    },
659
    {
660
      "cell_type": "code",
661
      "source": [
662
        "generated_sentences_p2[0]"
663
      ],
664
      "metadata": {
665
        "colab": {
666
          "base_uri": "https://localhost:8080/",
667
          "height": 52
668
        },
669
        "id": "DoGkaFUrKux6",
670
        "outputId": "c42a73f4-0e9c-40a1-9137-cf4adc1f937c"
671
      },
672
      "execution_count": 29,
673
      "outputs": [
674
        {
675
          "output_type": "execute_result",
676
          "data": {
677
            "text/plain": [
678
              "' self - reported healthy adults between the ages of <Measurement>18 - 60</Measurement> who are fluent in <Language>English</Language>.'"
679
            ],
680
            "application/vnd.google.colaboratory.intrinsic+json": {
681
              "type": "string"
682
            }
683
          },
684
          "metadata": {},
685
          "execution_count": 29
686
        }
687
      ]
688
    },
689
    {
690
      "cell_type": "code",
691
      "execution_count": 30,
692
      "metadata": {
693
        "colab": {
694
          "base_uri": "https://localhost:8080/",
695
          "height": 145,
696
          "referenced_widgets": [
697
            "c8846cf2991a4e93a0e91bc1802f1079",
698
            "3b12538b8e254b4ebcc79d694b2b272c",
699
            "205f861cb2ed48209b795217f645e077",
700
            "25605c2b3e93464daedc1fcade1f47b4",
701
            "98919ba0701545fd997e3d71f4eff69e",
702
            "013efa5f201d460e9332bd047982c449",
703
            "7a0323004bd14c579c27df13bf3f861b",
704
            "ef49e0a4613f4207b11012c494b4ed15",
705
            "f88c80704c59405f9224c9a98105303d",
706
            "1a4eab919e9e475ea452687215355489",
707
            "cf5d7616d08944108ae6dcfdb487cd5a",
708
            "ae392574043b40c8bf461468e8f06f18",
709
            "8af0715bcf2647c390c073430ffbbe5d",
710
            "44e3af9f1124467b94cbb6f90f1669a3",
711
            "804dbc29e4a9404d935b8ab10dc481e9",
712
            "6ed75fda4c3f4fb2ab931ee7e162b3c4",
713
            "d71d92ee94994f93bbbdedb475dc5ba5",
714
            "1780c6cc045a49a6838bc1e43c89dca1",
715
            "51b7d216e4144dd0a5ea92eb12e410b7",
716
            "6e2bf10b566a453d8b342e824ec36fdf",
717
            "1a7b5758d56d4811902698d1ad696163",
718
            "11047f16bb534e70ad1d6bdde663cfa4",
719
            "3791512b6e92422799da4e950c6dde6d",
720
            "161970c8736a46899cd017c5e5906352",
721
            "cfafc68041754603b0916c8e28a90d0b",
722
            "a91cbbefe5ca4c07a46ad81424e2f0a7",
723
            "995f169324d647fbb1c09c7ea3ec22d6",
724
            "c617e79d6b4448f1af28c2d3c46d4ee5",
725
            "1d23076205f3470590110feb368afd52",
726
            "99a5dcb8b54c4e1da8747d16e1d54538",
727
            "a890ce1e992a4f04a524d3c76927d5f6",
728
            "4e12dbb6c203422193cf12d58338f1c3",
729
            "ee8308b8ca9a4dd8b9993824f2df176c",
730
            "3786b6a492114f58b8c92f904c80b9e2",
731
            "d6d29174233e4d1596dd391b9a22eac2",
732
            "fc135f2fe68d4b83ad57ecb3085febe1",
733
            "f539dc11a57e425ea714a681d5027960",
734
            "af7fa5b783c54fe4b62e53cd6641de2b",
735
            "f0e324c229f7477baf1bb26fa4bc85ed",
736
            "205312c7b62b4410a5aa1298deb1fb6f",
737
            "4847afa0d5044cb0a0d693495ff05c63",
738
            "0a7d5f7c3174491990f4f1018cbafe4f",
739
            "b01bb888c2a646c0af368da52b9b4f2f",
740
            "bf557e4621f04d56a736fda56470d77e"
741
          ]
742
        },
743
        "id": "3L_-8fahWJJ0",
744
        "outputId": "4366e679-185a-490a-8b61-2677a32c83c5"
745
      },
746
      "outputs": [
747
        {
748
          "output_type": "display_data",
749
          "data": {
750
            "text/plain": [
751
              "tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]"
752
            ],
753
            "application/vnd.jupyter.widget-view+json": {
754
              "version_major": 2,
755
              "version_minor": 0,
756
              "model_id": "c8846cf2991a4e93a0e91bc1802f1079"
757
            }
758
          },
759
          "metadata": {}
760
        },
761
        {
762
          "output_type": "display_data",
763
          "data": {
764
            "text/plain": [
765
              "config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]"
766
            ],
767
            "application/vnd.jupyter.widget-view+json": {
768
              "version_major": 2,
769
              "version_minor": 0,
770
              "model_id": "ae392574043b40c8bf461468e8f06f18"
771
            }
772
          },
773
          "metadata": {}
774
        },
775
        {
776
          "output_type": "display_data",
777
          "data": {
778
            "text/plain": [
779
              "sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]"
780
            ],
781
            "application/vnd.jupyter.widget-view+json": {
782
              "version_major": 2,
783
              "version_minor": 0,
784
              "model_id": "3791512b6e92422799da4e950c6dde6d"
785
            }
786
          },
787
          "metadata": {}
788
        },
789
        {
790
          "output_type": "display_data",
791
          "data": {
792
            "text/plain": [
793
              "tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]"
794
            ],
795
            "application/vnd.jupyter.widget-view+json": {
796
              "version_major": 2,
797
              "version_minor": 0,
798
              "model_id": "3786b6a492114f58b8c92f904c80b9e2"
799
            }
800
          },
801
          "metadata": {}
802
        }
803
      ],
804
      "source": [
805
        "tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')"
806
      ]
807
    },
808
    {
809
      "cell_type": "code",
810
      "execution_count": 31,
811
      "metadata": {
812
        "id": "9TAmhnSSWJJ1"
813
      },
814
      "outputs": [],
815
      "source": [
816
        "# tokenize and align the labels in the dataset\n",
817
        "def tokenize_and_align_labels(sentence, flag = 'I'):\n",
818
        "    \"\"\"\n",
819
        "    Tokenize the sentence and align the labels\n",
820
        "    inputs:\n",
821
        "        sentence: dict, the sentence from the dataset\n",
822
        "        flag: str, the flag to indicate how to deal with the labels for subwords\n",
823
        "            - 'I': use the label of the first subword for all subwords but as intermediate (I-ENT)\n",
824
        "            - 'B': use the label of the first subword for all subwords as beginning (B-ENT)\n",
825
        "            - None: use -100 for subwords\n",
826
        "    outputs:\n",
827
        "        tokenized_sentence: dict, the tokenized sentence now with a field for the labels\n",
828
        "    \"\"\"\n",
829
        "    tokenized_sentence = tokenizer(sentence['tokens'], is_split_into_words=True, truncation=True)\n",
830
        "\n",
831
        "    labels = []\n",
832
        "    for i, labels_s in enumerate(sentence['ner_tags']):\n",
833
        "        word_ids = tokenized_sentence.word_ids(batch_index=i)\n",
834
        "        previous_word_idx = None\n",
835
        "        label_ids = []\n",
836
        "        for word_idx in word_ids:\n",
837
        "            # if the word_idx is None, assign -100\n",
838
        "            if word_idx is None:\n",
839
        "                label_ids.append(-100)\n",
840
        "            # if it is a new word, assign the corresponding label\n",
841
        "            elif word_idx != previous_word_idx:\n",
842
        "                label_ids.append(labels_s[word_idx])\n",
843
        "            # if it is the same word, check the flag to assign\n",
844
        "            else:\n",
845
        "                if flag == 'I':\n",
846
        "                    if entities_list[labels_s[word_idx]].startswith('I'):\n",
847
        "                      label_ids.append(labels_s[word_idx])\n",
848
        "                    else:\n",
849
        "                      label_ids.append(labels_s[word_idx] + 1)\n",
850
        "                elif flag == 'B':\n",
851
        "                    label_ids.append(labels_s[word_idx])\n",
852
        "                elif flag == None:\n",
853
        "                    label_ids.append(-100)\n",
854
        "            previous_word_idx = word_idx\n",
855
        "        labels.append(label_ids)\n",
856
        "    tokenized_sentence['labels'] = labels\n",
857
        "    return tokenized_sentence"
858
      ]
859
    },
860
    {
861
      "cell_type": "markdown",
862
      "metadata": {
863
        "id": "0KSHfmoLWJJ1"
864
      },
865
      "source": [
866
        "**Standarizing true annotations**"
867
      ]
868
    },
869
    {
870
      "cell_type": "code",
871
      "source": [
872
        "import re"
873
      ],
874
      "metadata": {
875
        "id": "p0OeK1ero68J"
876
      },
877
      "execution_count": 32,
878
      "outputs": []
879
    },
880
    {
881
      "cell_type": "code",
882
      "execution_count": 33,
883
      "metadata": {
884
        "colab": {
885
          "base_uri": "https://localhost:8080/"
886
        },
887
        "id": "OVY071a-WJJ2",
888
        "outputId": "e40e2a96-71e6-45f6-82c6-0c343fdc3f0f"
889
      },
890
      "outputs": [
891
        {
892
          "output_type": "execute_result",
893
          "data": {
894
            "text/plain": [
895
              "1307"
896
            ]
897
          },
898
          "metadata": {},
899
          "execution_count": 33
900
        }
901
      ],
902
      "source": [
903
        "new_true_annotations = []\n",
904
        "for sent in dataset['test']:\n",
905
        "    annotation = []\n",
906
        "    for word, tag in zip(sent['tokens'], sent['ner_tags']):\n",
907
        "        annotation.append((word, entities_list[tag]))\n",
908
        "    new_annotation = []\n",
909
        "    ps = r'(\\.|\\,|\\:|\\;|\\!|\\?|\\-|\\(|\\)|\\[|\\]|\\{|\\}|\\\")'\n",
910
        "    for i,(word, tag) in enumerate(annotation):\n",
911
        "        if re.search(ps, word):\n",
912
        "            # find the ocurrences of the punctuation signs\n",
913
        "            occurrences = re.finditer(ps, word)\n",
914
        "            indexes = [(match.start(), match.end()) for match in occurrences]\n",
915
        "            # create the new tokens\n",
916
        "            last = 0\n",
917
        "            for j, (beg, end) in enumerate(indexes):\n",
918
        "                if beg > last:\n",
919
        "                    new_annotation.append((word[last:beg], tag))\n",
920
        "                if tag != \"O\":\n",
921
        "                    label = f'I-{tag.split(\"-\")[1]}'\n",
922
        "                else:\n",
923
        "                    label = \"O\"\n",
924
        "                if end < len(word) or (i < len(annotation) - 1 and annotation[i+1][1] == label):\n",
925
        "                    new_annotation.append((word[beg:end], label))\n",
926
        "                else:\n",
927
        "                    new_annotation.append((word[beg:end], 'O'))\n",
928
        "                last = end\n",
929
        "            if last < len(word):\n",
930
        "                new_annotation.append((word[last:], label))\n",
931
        "        else:\n",
932
        "            new_annotation.append((word, tag))\n",
933
        "    new_true_annotations.append(new_annotation)\n",
934
        "len(new_true_annotations)"
935
      ]
936
    },
937
    {
938
      "cell_type": "code",
939
      "execution_count": 34,
940
      "metadata": {
941
        "colab": {
942
          "base_uri": "https://localhost:8080/"
943
        },
944
        "id": "K0LaWT56WJJ2",
945
        "outputId": "5d996a00-56dc-421c-d81f-17ad3549330e"
946
      },
947
      "outputs": [
948
        {
949
          "output_type": "execute_result",
950
          "data": {
951
            "text/plain": [
952
              "1307"
953
            ]
954
          },
955
          "metadata": {},
956
          "execution_count": 34
957
        }
958
      ],
959
      "source": [
960
        "true_annotations = []\n",
961
        "for sent in new_true_annotations:\n",
962
        "    dicc_sent = {\"tokens\":[], \"ner_tags\":[]}\n",
963
        "    for word, tag in sent:\n",
964
        "        dicc_sent[\"tokens\"].append(word)\n",
965
        "        dicc_sent[\"ner_tags\"].append(sel_ent[tag])\n",
966
        "    true_annotations.append(dicc_sent)\n",
967
        "len(true_annotations)"
968
      ]
969
    },
970
    {
971
      "cell_type": "code",
972
      "execution_count": 35,
973
      "metadata": {
974
        "id": "FQgSF-gaWJJ2"
975
      },
976
      "outputs": [],
977
      "source": [
978
        "true_df = pd.DataFrame(true_annotations)\n",
979
        "true_ann_dataset = Dataset.from_pandas(true_df)"
980
      ]
981
    },
982
    {
983
      "cell_type": "code",
984
      "execution_count": 36,
985
      "metadata": {
986
        "colab": {
987
          "base_uri": "https://localhost:8080/",
988
          "height": 49,
989
          "referenced_widgets": [
990
            "cabdca2ad56e43eaa506e4588087c3f2",
991
            "116d6d8de1af401a971f3da77cb4b916",
992
            "5644e8831133414dbb54e899f1253e8b",
993
            "75cff91a18654513bc53045df04832f6",
994
            "883ee6d5131443eba2c4598c9fa0223e",
995
            "5454af7e0b964f388ecab1f83f74ecc9",
996
            "9f62543c61b04183b7784826e9178321",
997
            "dd31f0ed6eee4b6a97d4c9ee6addabe3",
998
            "9f072c7c1f7646ef876b49917e1eccff",
999
            "76f09404ea3d447782bb09cd27e9a723",
1000
            "f2356694803b4c678de2ca0a11cb11e2"
1001
          ]
1002
        },
1003
        "id": "5aO9u8ceWJJ2",
1004
        "outputId": "6f999f1c-db84-4839-e645-aa81650f4168"
1005
      },
1006
      "outputs": [
1007
        {
1008
          "output_type": "display_data",
1009
          "data": {
1010
            "text/plain": [
1011
              "Map:   0%|          | 0/1307 [00:00<?, ? examples/s]"
1012
            ],
1013
            "application/vnd.jupyter.widget-view+json": {
1014
              "version_major": 2,
1015
              "version_minor": 0,
1016
              "model_id": "cabdca2ad56e43eaa506e4588087c3f2"
1017
            }
1018
          },
1019
          "metadata": {}
1020
        }
1021
      ],
1022
      "source": [
1023
        "true_ann_dataset = true_ann_dataset.map(tokenize_and_align_labels, batched=True)"
1024
      ]
1025
    },
1026
    {
1027
      "cell_type": "code",
1028
      "source": [
1029
        "true_ann_dataset['ner_tags'][0]"
1030
      ],
1031
      "metadata": {
1032
        "colab": {
1033
          "base_uri": "https://localhost:8080/"
1034
        },
1035
        "id": "D2eYI1XdpirH",
1036
        "outputId": "a65ef39b-f749-4ece-86e9-2ba03f39e4fe"
1037
      },
1038
      "execution_count": 37,
1039
      "outputs": [
1040
        {
1041
          "output_type": "execute_result",
1042
          "data": {
1043
            "text/plain": [
1044
              "[0, 0, 0, 1, 15, 3, 4, 4, 4, 4, 4, 4, 0, 0, 13, 14, 14, 0]"
1045
            ]
1046
          },
1047
          "metadata": {},
1048
          "execution_count": 37
1049
        }
1050
      ]
1051
    },
1052
    {
1053
      "cell_type": "markdown",
1054
      "metadata": {
1055
        "id": "VMMWntNGWJJ2"
1056
      },
1057
      "source": [
1058
        "**Evaluating prompt 1**"
1059
      ]
1060
    },
1061
    {
1062
      "cell_type": "code",
1063
      "execution_count": 121,
1064
      "metadata": {
1065
        "id": "bTLXGyNwWJJ2"
1066
      },
1067
      "outputs": [],
1068
      "source": [
1069
        "new_p1_annotations = []\n",
1070
        "for sent in generated_sentences_p1:\n",
1071
        "    annotation = []\n",
1072
        "    for line in sent.split('\\n')[2:]:\n",
1073
        "        if line != '':\n",
1074
        "            splited_line = line.split()\n",
1075
        "            if len(splited_line) > 2:\n",
1076
        "                splited_line = [' '.join(splited_line[:-1]), splited_line[-1]]\n",
1077
        "            if len(splited_line) != 2:\n",
1078
        "                continue\n",
1079
        "            word, tag = splited_line\n",
1080
        "            annotation.append((word, tag))\n",
1081
        "    new_annotation = []\n",
1082
        "    ps = r'(\\.|\\,|\\:|\\;|\\!|\\?|\\-|\\(|\\)|\\[|\\]|\\{|\\}|\\\")'\n",
1083
        "    for i,(word, tag) in enumerate(annotation):\n",
1084
        "        if tag not in sel_ent:\n",
1085
        "            tag = \"O\"\n",
1086
        "        if re.search(ps, word):\n",
1087
        "            # find the ocurrences of the punctuation signs\n",
1088
        "            occurrences = re.finditer(ps, word)\n",
1089
        "            indexes = [(match.start(), match.end()) for match in occurrences]\n",
1090
        "            # create the new tokens\n",
1091
        "            last = 0\n",
1092
        "            for j, (beg, end) in enumerate(indexes):\n",
1093
        "                if beg > last:\n",
1094
        "                    new_annotation.append((word[last:beg], tag))\n",
1095
        "                if tag != \"O\":\n",
1096
        "                    if not tag.startswith('I') and not tag.startswith('B'):\n",
1097
        "                        label = \"O\"\n",
1098
        "                    else:\n",
1099
        "                        label = f'I-{tag.split(\"-\")[1]}'\n",
1100
        "                else:\n",
1101
        "                    label = \"O\"\n",
1102
        "                if end < len(word) or (i < len(annotation) - 1 and annotation[i+1][1] == label):\n",
1103
        "                    new_annotation.append((word[beg:end], label))\n",
1104
        "                else:\n",
1105
        "                    new_annotation.append((word[beg:end], 'O'))\n",
1106
        "                last = end\n",
1107
        "            if last < len(word):\n",
1108
        "                new_annotation.append((word[last:], label))\n",
1109
        "        else:\n",
1110
        "            new_annotation.append((word, tag))\n",
1111
        "    new_p1_annotations.append(new_annotation)\n",
1112
        "len(new_p1_annotations)"
1113
      ]
1114
    },
1115
    {
1116
      "cell_type": "code",
1117
      "execution_count": 73,
1118
      "metadata": {
1119
        "colab": {
1120
          "base_uri": "https://localhost:8080/"
1121
        },
1122
        "id": "nICoEVoIWJJ3",
1123
        "outputId": "7d65b295-6d8a-4cc1-b751-384433a1c7ea"
1124
      },
1125
      "outputs": [
1126
        {
1127
          "output_type": "execute_result",
1128
          "data": {
1129
            "text/plain": [
1130
              "50"
1131
            ]
1132
          },
1133
          "metadata": {},
1134
          "execution_count": 73
1135
        }
1136
      ],
1137
      "source": [
1138
        "p1_annotations = []\n",
1139
        "for sent in new_p1_annotations:\n",
1140
        "    dicc_sent = {\"tokens\":[], \"ner_tags\":[]}\n",
1141
        "    for word, tag in sent:\n",
1142
        "        dicc_sent[\"tokens\"].append(word)\n",
1143
        "        dicc_sent[\"ner_tags\"].append(sel_ent[tag])\n",
1144
        "    p1_annotations.append(dicc_sent)\n",
1145
        "len(p1_annotations)"
1146
      ]
1147
    },
1148
    {
1149
      "cell_type": "code",
1150
      "source": [
1151
        "p1_annotations"
1152
      ],
1153
      "metadata": {
1154
        "colab": {
1155
          "base_uri": "https://localhost:8080/"
1156
        },
1157
        "id": "rp1aH-fztMQn",
1158
        "outputId": "fbb92320-c587-438d-feb1-17b85d78cd42"
1159
      },
1160
      "execution_count": 74,
1161
      "outputs": [
1162
        {
1163
          "output_type": "execute_result",
1164
          "data": {
1165
            "text/plain": [
1166
              "[{'tokens': ['self',\n",
1167
              "   '-',\n",
1168
              "   'reported',\n",
1169
              "   'healthy',\n",
1170
              "   'adults',\n",
1171
              "   'between',\n",
1172
              "   'the',\n",
1173
              "   'ages',\n",
1174
              "   'of',\n",
1175
              "   '18',\n",
1176
              "   '-',\n",
1177
              "   '60',\n",
1178
              "   'who',\n",
1179
              "   'are',\n",
1180
              "   'fluent',\n",
1181
              "   'in',\n",
1182
              "   'English',\n",
1183
              "   '.'],\n",
1184
              "  'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]},\n",
1185
              " {'tokens': ['Treatment',\n",
1186
              "   'with',\n",
1187
              "   'any',\n",
1188
              "   'investigational',\n",
1189
              "   'drug',\n",
1190
              "   'within',\n",
1191
              "   '30',\n",
1192
              "   'days',\n",
1193
              "   'of',\n",
1194
              "   'entry',\n",
1195
              "   'to',\n",
1196
              "   'this',\n",
1197
              "   'protocol'],\n",
1198
              "  'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]},\n",
1199
              " {'tokens': ['Current', 'treatment', 'with', 'Telbivudine'],\n",
1200
              "  'ner_tags': [0, 0, 0, 5]},\n",
1201
              " {'tokens': ['Severe',\n",
1202
              "   'hepatitis',\n",
1203
              "   'activity',\n",
1204
              "   'as',\n",
1205
              "   'documented',\n",
1206
              "   'by',\n",
1207
              "   'ALT>10 x ULN'],\n",
1208
              "  'ner_tags': [0, 0, 0, 0, 0, 0, 0]},\n",
1209
              " {'tokens': ['History',\n",
1210
              "   'of',\n",
1211
              "   'decompensated',\n",
1212
              "   'cirrhosis',\n",
1213
              "   '(',\n",
1214
              "   'defined',\n",
1215
              "   'as',\n",
1216
              "   'jaundice',\n",
1217
              "   'in',\n",
1218
              "   'the presence',\n",
1219
              "   'of',\n",
1220
              "   'cirrhosis',\n",
1221
              "   ',',\n",
1222
              "   'ascites',\n",
1223
              "   ',',\n",
1224
              "   'bleeding',\n",
1225
              "   'gastric',\n",
1226
              "   'or',\n",
1227
              "   'esophageal',\n",
1228
              "   'varices',\n",
1229
              "   'or',\n",
1230
              "   'encephalopathy',\n",
1231
              "   ')'],\n",
1232
              "  'ner_tags': [0,\n",
1233
              "   0,\n",
1234
              "   0,\n",
1235
              "   1,\n",
1236
              "   0,\n",
1237
              "   0,\n",
1238
              "   0,\n",
1239
              "   0,\n",
1240
              "   0,\n",
1241
              "   0,\n",
1242
              "   0,\n",
1243
              "   0,\n",
1244
              "   0,\n",
1245
              "   0,\n",
1246
              "   0,\n",
1247
              "   0,\n",
1248
              "   0,\n",
1249
              "   0,\n",
1250
              "   0,\n",
1251
              "   0,\n",
1252
              "   0,\n",
1253
              "   0,\n",
1254
              "   0]},\n",
1255
              " {'tokens': ['Pre',\n",
1256
              "   '-',\n",
1257
              "   ' existent',\n",
1258
              "   'neutropenia',\n",
1259
              "   '(',\n",
1260
              "   'neutrophils',\n",
1261
              "   '< 1',\n",
1262
              "   ',',\n",
1263
              "   ' 500 / mm3',\n",
1264
              "   ')',\n",
1265
              "   'or',\n",
1266
              "   'thrombocytopenia',\n",
1267
              "   '(',\n",
1268
              "   'platelets',\n",
1269
              "   '< 90',\n",
1270
              "   ',',\n",
1271
              "   ' 000 / mm3',\n",
1272
              "   ')'],\n",
1273
              "  'ner_tags': [0, 0, 0, 1, 0, 2, 4, 4, 4, 0, 0, 1, 0, 2, 4, 4, 4, 0]},\n",
1274
              " {'tokens': ['Co ',\n",
1275
              "   '-',\n",
1276
              "   ' infection',\n",
1277
              "   'with',\n",
1278
              "   'hepatitis C virus',\n",
1279
              "   ',',\n",
1280
              "   'hepatitis D virus',\n",
1281
              "   'or',\n",
1282
              "   'human immunodeficiency virus',\n",
1283
              "   '(',\n",
1284
              "   ' HIV ',\n",
1285
              "   ')'],\n",
1286
              "  'ner_tags': [5, 6, 6, 0, 6, 0, 6, 0, 6, 6, 6, 0]},\n",
1287
              " {'tokens': ['Other',\n",
1288
              "   'acquired',\n",
1289
              "   'or',\n",
1290
              "   'inherited',\n",
1291
              "   'causes',\n",
1292
              "   'of',\n",
1293
              "   'liver',\n",
1294
              "   'disease',\n",
1295
              "   ':',\n",
1296
              "   'alcoholic',\n",
1297
              "   'liver',\n",
1298
              "   'disease',\n",
1299
              "   ',',\n",
1300
              "   'obesity',\n",
1301
              "   'induced',\n",
1302
              "   'liver',\n",
1303
              "   'disease',\n",
1304
              "   ',',\n",
1305
              "   'drug',\n",
1306
              "   'related',\n",
1307
              "   'liver',\n",
1308
              "   'disease',\n",
1309
              "   ',',\n",
1310
              "   'auto ',\n",
1311
              "   '-',\n",
1312
              "   ' immune',\n",
1313
              "   'hepatitis',\n",
1314
              "   ',',\n",
1315
              "   'hemochromatosis',\n",
1316
              "   ',',\n",
1317
              "   \"Wilson's\",\n",
1318
              "   'disease',\n",
1319
              "   'or',\n",
1320
              "   'alpha ',\n",
1321
              "   '-',\n",
1322
              "   ' 1',\n",
1323
              "   'antitrypsin',\n",
1324
              "   'deficiency'],\n",
1325
              "  'ner_tags': [0,\n",
1326
              "   0,\n",
1327
              "   0,\n",
1328
              "   0,\n",
1329
              "   0,\n",
1330
              "   0,\n",
1331
              "   0,\n",
1332
              "   0,\n",
1333
              "   0,\n",
1334
              "   0,\n",
1335
              "   0,\n",
1336
              "   0,\n",
1337
              "   0,\n",
1338
              "   0,\n",
1339
              "   0,\n",
1340
              "   0,\n",
1341
              "   0,\n",
1342
              "   0,\n",
1343
              "   0,\n",
1344
              "   0,\n",
1345
              "   0,\n",
1346
              "   0,\n",
1347
              "   0,\n",
1348
              "   0,\n",
1349
              "   0,\n",
1350
              "   0,\n",
1351
              "   0,\n",
1352
              "   0,\n",
1353
              "   0,\n",
1354
              "   0,\n",
1355
              "   0,\n",
1356
              "   0,\n",
1357
              "   0,\n",
1358
              "   0,\n",
1359
              "   0,\n",
1360
              "   0,\n",
1361
              "   0,\n",
1362
              "   0]},\n",
1363
              " {'tokens': ['Alpha fetoprotein', '>', '50', 'ng / ml'],\n",
1364
              "  'ner_tags': [3, 4, 4, 4]},\n",
1365
              " {'tokens': ['Hyper ',\n",
1366
              "   '-',\n",
1367
              "   ' or hypothyroidism',\n",
1368
              "   '(',\n",
1369
              "   ' subjects requiring medication to maintain TSH levels in the normal range are eligible if all other inclusion / exclusion criteria are met ',\n",
1370
              "   ')'],\n",
1371
              "  'ner_tags': [1, 2, 2, 0, 0, 0]},\n",
1372
              " {'tokens': ['Immune',\n",
1373
              "   'suppressive',\n",
1374
              "   'treatment',\n",
1375
              "   'within',\n",
1376
              "   'the',\n",
1377
              "   'previous',\n",
1378
              "   'B',\n",
1379
              "   '-',\n",
1380
              "   '6 months I',\n",
1381
              "   '-',\n",
1382
              "   '6',\n",
1383
              "   'months'],\n",
1384
              "  'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]},\n",
1385
              " {'tokens': ['Contra',\n",
1386
              "   '-',\n",
1387
              "   'indications',\n",
1388
              "   'for',\n",
1389
              "   'alfa',\n",
1390
              "   '-',\n",
1391
              "   'interferon',\n",
1392
              "   'therapy',\n",
1393
              "   'like',\n",
1394
              "   'suspected',\n",
1395
              "   'hypersensitivity',\n",
1396
              "   'to',\n",
1397
              "   'interferon',\n",
1398
              "   'or',\n",
1399
              "   'Peginterferon',\n",
1400
              "   'or',\n",
1401
              "   'any',\n",
1402
              "   'known',\n",
1403
              "   'pre ',\n",
1404
              "   '-',\n",
1405
              "   ' existing',\n",
1406
              "   'medical',\n",
1407
              "   'condition',\n",
1408
              "   'that',\n",
1409
              "   'could',\n",
1410
              "   'interfere',\n",
1411
              "   'with',\n",
1412
              "   'the',\n",
1413
              "   \"patient's\",\n",
1414
              "   'participation',\n",
1415
              "   'in',\n",
1416
              "   'and',\n",
1417
              "   'completion',\n",
1418
              "   'of',\n",
1419
              "   'the',\n",
1420
              "   'study',\n",
1421
              "   '.'],\n",
1422
              "  'ner_tags': [0,\n",
1423
              "   0,\n",
1424
              "   0,\n",
1425
              "   0,\n",
1426
              "   0,\n",
1427
              "   0,\n",
1428
              "   0,\n",
1429
              "   0,\n",
1430
              "   0,\n",
1431
              "   0,\n",
1432
              "   0,\n",
1433
              "   0,\n",
1434
              "   0,\n",
1435
              "   0,\n",
1436
              "   0,\n",
1437
              "   0,\n",
1438
              "   0,\n",
1439
              "   0,\n",
1440
              "   0,\n",
1441
              "   0,\n",
1442
              "   0,\n",
1443
              "   0,\n",
1444
              "   0,\n",
1445
              "   0,\n",
1446
              "   0,\n",
1447
              "   0,\n",
1448
              "   0,\n",
1449
              "   0,\n",
1450
              "   0,\n",
1451
              "   0,\n",
1452
              "   0,\n",
1453
              "   0,\n",
1454
              "   0,\n",
1455
              "   0,\n",
1456
              "   0,\n",
1457
              "   0,\n",
1458
              "   0]},\n",
1459
              " {'tokens': ['Pregnancy', ',', ' breast ', '-', ' feeding', 'considerations'],\n",
1460
              "  'ner_tags': [0, 0, 0, 0, 0, 0]},\n",
1461
              " {'tokens': [], 'ner_tags': []},\n",
1462
              " {'tokens': ['Any',\n",
1463
              "   'medical',\n",
1464
              "   'condition',\n",
1465
              "   'requiring',\n",
1466
              "   ',',\n",
1467
              "   'or',\n",
1468
              "   'likely',\n",
1469
              "   'to',\n",
1470
              "   'require',\n",
1471
              "   'chronic',\n",
1472
              "   'systemic',\n",
1473
              "   'administration',\n",
1474
              "   'of',\n",
1475
              "   'steroids',\n",
1476
              "   ',',\n",
1477
              "   'during',\n",
1478
              "   'the',\n",
1479
              "   'course',\n",
1480
              "   'of',\n",
1481
              "   'the',\n",
1482
              "   'study'],\n",
1483
              "  'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]},\n",
1484
              " {'tokens': ['Substance',\n",
1485
              "   'abuse',\n",
1486
              "   ',',\n",
1487
              "   'such as',\n",
1488
              "   'alcohol',\n",
1489
              "   '(',\n",
1490
              "   ' > 80 g / day ',\n",
1491
              "   ')',\n",
1492
              "   ',',\n",
1493
              "   'I',\n",
1494
              "   '.',\n",
1495
              "   ' V',\n",
1496
              "   '.',\n",
1497
              "   'drugs',\n",
1498
              "   'and',\n",
1499
              "   'inhaled',\n",
1500
              "   'drugs',\n",
1501
              "   'in the past 2 years',\n",
1502
              "   '.'],\n",
1503
              "  'ner_tags': [5, 6, 0, 0, 5, 6, 6, 0, 0, 5, 6, 5, 6, 6, 0, 5, 6, 6, 0]},\n",
1504
              " {'tokens': ['Any',\n",
1505
              "   'other',\n",
1506
              "   'condition',\n",
1507
              "   'which',\n",
1508
              "   'in',\n",
1509
              "   'the',\n",
1510
              "   'opinion',\n",
1511
              "   'of',\n",
1512
              "   'the',\n",
1513
              "   'investigator',\n",
1514
              "   'would',\n",
1515
              "   'make',\n",
1516
              "   'the',\n",
1517
              "   'patient',\n",
1518
              "   'unsuitable',\n",
1519
              "   'for',\n",
1520
              "   'enrollment',\n",
1521
              "   ',',\n",
1522
              "   'or',\n",
1523
              "   'could',\n",
1524
              "   'interfere',\n",
1525
              "   'with',\n",
1526
              "   'the',\n",
1527
              "   'patient',\n",
1528
              "   'participating',\n",
1529
              "   'in',\n",
1530
              "   'and',\n",
1531
              "   'completing',\n",
1532
              "   'the',\n",
1533
              "   'study'],\n",
1534
              "  'ner_tags': [0,\n",
1535
              "   0,\n",
1536
              "   0,\n",
1537
              "   0,\n",
1538
              "   0,\n",
1539
              "   0,\n",
1540
              "   0,\n",
1541
              "   0,\n",
1542
              "   0,\n",
1543
              "   0,\n",
1544
              "   0,\n",
1545
              "   0,\n",
1546
              "   0,\n",
1547
              "   0,\n",
1548
              "   0,\n",
1549
              "   0,\n",
1550
              "   0,\n",
1551
              "   0,\n",
1552
              "   0,\n",
1553
              "   0,\n",
1554
              "   0,\n",
1555
              "   0,\n",
1556
              "   0,\n",
1557
              "   0,\n",
1558
              "   0,\n",
1559
              "   0,\n",
1560
              "   0,\n",
1561
              "   0,\n",
1562
              "   0,\n",
1563
              "   0]},\n",
1564
              " {'tokens': ['Patients', 'undergoing', 'urologic', 'surgery'],\n",
1565
              "  'ner_tags': [0, 0, 7, 8]},\n",
1566
              " {'tokens': ['Pregnancy',\n",
1567
              "   ',',\n",
1568
              "   'coagulopathy',\n",
1569
              "   ',',\n",
1570
              "   'allergy to bupivacaine',\n",
1571
              "   ',',\n",
1572
              "   'renal failure',\n",
1573
              "   ',',\n",
1574
              "   'hepatic insufficiency',\n",
1575
              "   ',',\n",
1576
              "   'and/or',\n",
1577
              "   'inappropriate candidate for usual therapy',\n",
1578
              "   '(',\n",
1579
              "   'specifically',\n",
1580
              "   ',',\n",
1581
              "   'if unable to receive the usual preoperative interscalene nerve block',\n",
1582
              "   ':',\n",
1583
              "   'preexisting nerve injury on side of surgery',\n",
1584
              "   ',',\n",
1585
              "   'refusal of nerve block',\n",
1586
              "   ',',\n",
1587
              "   'infection at site of nerve block',\n",
1588
              "   ')',\n",
1589
              "   '.'],\n",
1590
              "  'ner_tags': [0,\n",
1591
              "   0,\n",
1592
              "   0,\n",
1593
              "   0,\n",
1594
              "   0,\n",
1595
              "   0,\n",
1596
              "   0,\n",
1597
              "   0,\n",
1598
              "   0,\n",
1599
              "   0,\n",
1600
              "   0,\n",
1601
              "   0,\n",
1602
              "   0,\n",
1603
              "   0,\n",
1604
              "   0,\n",
1605
              "   0,\n",
1606
              "   0,\n",
1607
              "   0,\n",
1608
              "   0,\n",
1609
              "   0,\n",
1610
              "   0,\n",
1611
              "   0,\n",
1612
              "   0,\n",
1613
              "   0]},\n",
1614
              " {'tokens': ['Pregnant women',\n",
1615
              "   'with',\n",
1616
              "   'APS',\n",
1617
              "   'diagnosed',\n",
1618
              "   'according to',\n",
1619
              "   'the revised',\n",
1620
              "   'classification',\n",
1621
              "   'criteria',\n",
1622
              "   'for',\n",
1623
              "   'APS',\n",
1624
              "   'in',\n",
1625
              "   'Sydney',\n",
1626
              "   ',',\n",
1627
              "   'Australia'],\n",
1628
              "  'ner_tags': [0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 2, 2]},\n",
1629
              " {'tokens': ['Early',\n",
1630
              "   'pregnancy',\n",
1631
              "   'body',\n",
1632
              "   'weight',\n",
1633
              "   'is',\n",
1634
              "   '50 ',\n",
1635
              "   '-',\n",
1636
              "   ' 90 Kg'],\n",
1637
              "  'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0]},\n",
1638
              " {'tokens': ['On',\n",
1639
              "   'chronic',\n",
1640
              "   'treatment',\n",
1641
              "   '(',\n",
1642
              "   'i',\n",
1643
              "   '.',\n",
1644
              "   ' e',\n",
1645
              "   '.',\n",
1646
              "   ',',\n",
1647
              "   'two weeks',\n",
1648
              "   'or',\n",
1649
              "   'more',\n",
1650
              "   ')',\n",
1651
              "   'with',\n",
1652
              "   'any',\n",
1653
              "   'medication',\n",
1654
              "   'severely',\n",
1655
              "   'affecting',\n",
1656
              "   'oral',\n",
1657
              "   'status',\n",
1658
              "   '(',\n",
1659
              "   'e',\n",
1660
              "   '.',\n",
1661
              "   ' g',\n",
1662
              "   '.',\n",
1663
              "   'participants',\n",
1664
              "   'with',\n",
1665
              "   'gingival',\n",
1666
              "   'hypertrophy',\n",
1667
              "   'caused',\n",
1668
              "   'by',\n",
1669
              "   'anti ',\n",
1670
              "   '-',\n",
1671
              "   ' epileptics',\n",
1672
              "   ',',\n",
1673
              "   'calcium',\n",
1674
              "   'antagonists',\n",
1675
              "   ',',\n",
1676
              "   'cyclosporine',\n",
1677
              "   'and',\n",
1678
              "   'other',\n",
1679
              "   'immunosuppressive',\n",
1680
              "   ')',\n",
1681
              "   'or',\n",
1682
              "   'bone',\n",
1683
              "   'metabolism',\n",
1684
              "   '(',\n",
1685
              "   'e',\n",
1686
              "   '.',\n",
1687
              "   ' g',\n",
1688
              "   '.',\n",
1689
              "   'anticoagulant',\n",
1690
              "   'medications',\n",
1691
              "   ',',\n",
1692
              "   'long ',\n",
1693
              "   '-',\n",
1694
              "   ' standing',\n",
1695
              "   'steroid',\n",
1696
              "   'medications',\n",
1697
              "   '-',\n",
1698
              "   ' i',\n",
1699
              "   '.',\n",
1700
              "   ' e',\n",
1701
              "   '.',\n",
1702
              "   'equal',\n",
1703
              "   'or',\n",
1704
              "   'more',\n",
1705
              "   '2',\n",
1706
              "   '.',\n",
1707
              "   ' 5',\n",
1708
              "   'mg',\n",
1709
              "   'of',\n",
1710
              "   'prednisolone',\n",
1711
              "   'a',\n",
1712
              "   'day',\n",
1713
              "   'taken',\n",
1714
              "   'for',\n",
1715
              "   '> 3',\n",
1716
              "   'months',\n",
1717
              "   '-',\n",
1718
              "   ',',\n",
1719
              "   'anticonvulsants',\n",
1720
              "   ',',\n",
1721
              "   'immunosuppressants',\n",
1722
              "   ')'],\n",
1723
              "  'ner_tags': [0,\n",
1724
              "   0,\n",
1725
              "   0,\n",
1726
              "   0,\n",
1727
              "   0,\n",
1728
              "   0,\n",
1729
              "   0,\n",
1730
              "   0,\n",
1731
              "   0,\n",
1732
              "   0,\n",
1733
              "   0,\n",
1734
              "   0,\n",
1735
              "   0,\n",
1736
              "   0,\n",
1737
              "   0,\n",
1738
              "   0,\n",
1739
              "   0,\n",
1740
              "   0,\n",
1741
              "   0,\n",
1742
              "   0,\n",
1743
              "   0,\n",
1744
              "   0,\n",
1745
              "   0,\n",
1746
              "   0,\n",
1747
              "   0,\n",
1748
              "   0,\n",
1749
              "   0,\n",
1750
              "   0,\n",
1751
              "   0,\n",
1752
              "   0,\n",
1753
              "   0,\n",
1754
              "   0,\n",
1755
              "   0,\n",
1756
              "   0,\n",
1757
              "   0,\n",
1758
              "   0,\n",
1759
              "   0,\n",
1760
              "   0,\n",
1761
              "   0,\n",
1762
              "   0,\n",
1763
              "   0,\n",
1764
              "   0,\n",
1765
              "   0,\n",
1766
              "   0,\n",
1767
              "   0,\n",
1768
              "   0,\n",
1769
              "   0,\n",
1770
              "   0,\n",
1771
              "   0,\n",
1772
              "   0,\n",
1773
              "   0,\n",
1774
              "   0,\n",
1775
              "   0,\n",
1776
              "   0,\n",
1777
              "   0,\n",
1778
              "   0,\n",
1779
              "   0,\n",
1780
              "   0,\n",
1781
              "   0,\n",
1782
              "   0,\n",
1783
              "   0,\n",
1784
              "   0,\n",
1785
              "   0,\n",
1786
              "   0,\n",
1787
              "   0,\n",
1788
              "   0,\n",
1789
              "   0,\n",
1790
              "   0,\n",
1791
              "   0,\n",
1792
              "   0,\n",
1793
              "   0,\n",
1794
              "   0,\n",
1795
              "   0,\n",
1796
              "   0,\n",
1797
              "   0,\n",
1798
              "   0,\n",
1799
              "   0,\n",
1800
              "   0,\n",
1801
              "   0,\n",
1802
              "   0,\n",
1803
              "   0,\n",
1804
              "   0,\n",
1805
              "   0,\n",
1806
              "   0,\n",
1807
              "   0]},\n",
1808
              " {'tokens': ['Affected',\n",
1809
              "   'by',\n",
1810
              "   'systemic',\n",
1811
              "   'diseases',\n",
1812
              "   'recognized',\n",
1813
              "   'to',\n",
1814
              "   'severely',\n",
1815
              "   'affect',\n",
1816
              "   'bone',\n",
1817
              "   'metabolism',\n",
1818
              "   '(',\n",
1819
              "   'e',\n",
1820
              "   '.',\n",
1821
              "   'g',\n",
1822
              "   '.',\n",
1823
              "   \"Cushing's\",\n",
1824
              "   'syndrome',\n",
1825
              "   ',',\n",
1826
              "   \"Addison's\",\n",
1827
              "   'disease',\n",
1828
              "   ',',\n",
1829
              "   'diabetes',\n",
1830
              "   'mellitus',\n",
1831
              "   'type 1',\n",
1832
              "   ',',\n",
1833
              "   'leukaemia',\n",
1834
              "   ',',\n",
1835
              "   'pernicious',\n",
1836
              "   'anaemia',\n",
1837
              "   ',',\n",
1838
              "   'malabsorption',\n",
1839
              "   'syndromes',\n",
1840
              "   ',',\n",
1841
              "   'chronic',\n",
1842
              "   'liver',\n",
1843
              "   'disease',\n",
1844
              "   ',',\n",
1845
              "   'rheumatoid',\n",
1846
              "   'arthritis',\n",
1847
              "   ')',\n",
1848
              "   '.'],\n",
1849
              "  'ner_tags': [0,\n",
1850
              "   0,\n",
1851
              "   0,\n",
1852
              "   0,\n",
1853
              "   0,\n",
1854
              "   0,\n",
1855
              "   0,\n",
1856
              "   0,\n",
1857
              "   0,\n",
1858
              "   0,\n",
1859
              "   0,\n",
1860
              "   0,\n",
1861
              "   0,\n",
1862
              "   0,\n",
1863
              "   0,\n",
1864
              "   0,\n",
1865
              "   0,\n",
1866
              "   0,\n",
1867
              "   0,\n",
1868
              "   0,\n",
1869
              "   0,\n",
1870
              "   0,\n",
1871
              "   0,\n",
1872
              "   0,\n",
1873
              "   0,\n",
1874
              "   0,\n",
1875
              "   0,\n",
1876
              "   0,\n",
1877
              "   0,\n",
1878
              "   0,\n",
1879
              "   0,\n",
1880
              "   0,\n",
1881
              "   0,\n",
1882
              "   0,\n",
1883
              "   0,\n",
1884
              "   0,\n",
1885
              "   0,\n",
1886
              "   0,\n",
1887
              "   0,\n",
1888
              "   0,\n",
1889
              "   0]},\n",
1890
              " {'tokens': ['Knowingly', 'affected by', 'HIV', 'or', 'Hepatitis', '.'],\n",
1891
              "  'ner_tags': [0, 0, 0, 0, 0, 0]},\n",
1892
              " {'tokens': ['History',\n",
1893
              "   'of',\n",
1894
              "   'local',\n",
1895
              "   'radiation',\n",
1896
              "   'therapy',\n",
1897
              "   'in',\n",
1898
              "   'the',\n",
1899
              "   'last',\n",
1900
              "   'five',\n",
1901
              "   'years',\n",
1902
              "   '.'],\n",
1903
              "  'ner_tags': [0, 0, 5, 5, 6, 0, 11, 11, 11, 12, 0]},\n",
1904
              " {'tokens': ['Affected',\n",
1905
              "   'by',\n",
1906
              "   'limited',\n",
1907
              "   'mental',\n",
1908
              "   'capacity',\n",
1909
              "   'or',\n",
1910
              "   'language',\n",
1911
              "   'skills',\n",
1912
              "   'such that',\n",
1913
              "   'study',\n",
1914
              "   'information',\n",
1915
              "   'can not',\n",
1916
              "   'be',\n",
1917
              "   'understood',\n",
1918
              "   ',',\n",
1919
              "   'informed',\n",
1920
              "   'consent',\n",
1921
              "   'can not',\n",
1922
              "   'be',\n",
1923
              "   'obtained',\n",
1924
              "   ',',\n",
1925
              "   'or',\n",
1926
              "   'simple',\n",
1927
              "   'instructions',\n",
1928
              "   'can not',\n",
1929
              "   'be',\n",
1930
              "   'followed',\n",
1931
              "   '.'],\n",
1932
              "  'ner_tags': [15,\n",
1933
              "   0,\n",
1934
              "   0,\n",
1935
              "   0,\n",
1936
              "   0,\n",
1937
              "   0,\n",
1938
              "   0,\n",
1939
              "   0,\n",
1940
              "   0,\n",
1941
              "   0,\n",
1942
              "   0,\n",
1943
              "   0,\n",
1944
              "   0,\n",
1945
              "   0,\n",
1946
              "   0,\n",
1947
              "   0,\n",
1948
              "   0,\n",
1949
              "   0,\n",
1950
              "   0,\n",
1951
              "   0,\n",
1952
              "   0,\n",
1953
              "   0,\n",
1954
              "   0,\n",
1955
              "   0,\n",
1956
              "   0,\n",
1957
              "   0,\n",
1958
              "   0,\n",
1959
              "   0]},\n",
1960
              " {'tokens': ['Presenting',\n",
1961
              "   'an',\n",
1962
              "   'acute',\n",
1963
              "   'endodontic',\n",
1964
              "   '/',\n",
1965
              "   'periodontal',\n",
1966
              "   'lesion',\n",
1967
              "   'in',\n",
1968
              "   'the',\n",
1969
              "   'neighboring',\n",
1970
              "   'areas',\n",
1971
              "   'to',\n",
1972
              "   'the',\n",
1973
              "   'implant',\n",
1974
              "   'site',\n",
1975
              "   '.'],\n",
1976
              "  'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]},\n",
1977
              " {'tokens': ['Completely', 'edentulous', 'patients'], 'ner_tags': [0, 0, 0]},\n",
1978
              " {'tokens': ['With',\n",
1979
              "   'evident',\n",
1980
              "   'severe',\n",
1981
              "   'atrophy',\n",
1982
              "   'of',\n",
1983
              "   'the',\n",
1984
              "   'alveolar',\n",
1985
              "   'ridge',\n",
1986
              "   'that',\n",
1987
              "   'could',\n",
1988
              "   'preclude',\n",
1989
              "   'an',\n",
1990
              "   'implant',\n",
1991
              "   'placement',\n",
1992
              "   '(',\n",
1993
              "   'e',\n",
1994
              "   '.',\n",
1995
              "   'g',\n",
1996
              "   '.',\n",
1997
              "   'sharp',\n",
1998
              "   'knife',\n",
1999
              "   'edge',\n",
2000
              "   'ridge',\n",
2001
              "   ')'],\n",
2002
              "  'ner_tags': [0,\n",
2003
              "   0,\n",
2004
              "   0,\n",
2005
              "   0,\n",
2006
              "   0,\n",
2007
              "   0,\n",
2008
              "   0,\n",
2009
              "   0,\n",
2010
              "   0,\n",
2011
              "   0,\n",
2012
              "   0,\n",
2013
              "   0,\n",
2014
              "   0,\n",
2015
              "   0,\n",
2016
              "   0,\n",
2017
              "   0,\n",
2018
              "   0,\n",
2019
              "   0,\n",
2020
              "   0,\n",
2021
              "   0,\n",
2022
              "   0,\n",
2023
              "   0,\n",
2024
              "   0,\n",
2025
              "   0]},\n",
2026
              " {'tokens': ['Severe', 'bruxism', 'or', 'clenching', 'habits'],\n",
2027
              "  'ner_tags': [0, 0, 0, 0, 0]},\n",
2028
              " {'tokens': ['Smokers', 'of', '>', '5', 'cigarettes', 'a day'],\n",
2029
              "  'ner_tags': [0, 0, 0, 0, 0, 0]},\n",
2030
              " {'tokens': ['A daily', 'alcohol', 'intake', '>', '2 units', '/ day', '.'],\n",
2031
              "  'ner_tags': [5, 5, 6, 10, 10, 10, 0]},\n",
2032
              " {'tokens': ['Other',\n",
2033
              "   'severe',\n",
2034
              "   'acute',\n",
2035
              "   'or',\n",
2036
              "   'chronic',\n",
2037
              "   'medical',\n",
2038
              "   'or',\n",
2039
              "   'psychiatric',\n",
2040
              "   'condition',\n",
2041
              "   'or',\n",
2042
              "   'laboratory',\n",
2043
              "   'abnormality',\n",
2044
              "   'which',\n",
2045
              "   'may',\n",
2046
              "   'increase',\n",
2047
              "   'the',\n",
2048
              "   'risk',\n",
2049
              "   'associated',\n",
2050
              "   'with',\n",
2051
              "   'trial',\n",
2052
              "   'participation',\n",
2053
              "   'or',\n",
2054
              "   'investigational',\n",
2055
              "   'product',\n",
2056
              "   'administration',\n",
2057
              "   'or',\n",
2058
              "   'may',\n",
2059
              "   'interfere',\n",
2060
              "   'with',\n",
2061
              "   'the',\n",
2062
              "   'interpretation',\n",
2063
              "   'of',\n",
2064
              "   'study',\n",
2065
              "   'results',\n",
2066
              "   'and',\n",
2067
              "   ',',\n",
2068
              "   'in',\n",
2069
              "   'the',\n",
2070
              "   'judgment',\n",
2071
              "   'of',\n",
2072
              "   'the',\n",
2073
              "   'investigator',\n",
2074
              "   ',',\n",
2075
              "   'would',\n",
2076
              "   'make',\n",
2077
              "   'the',\n",
2078
              "   'participant',\n",
2079
              "   'inappropriate',\n",
2080
              "   'for',\n",
2081
              "   'entry',\n",
2082
              "   'into',\n",
2083
              "   'this',\n",
2084
              "   'trial',\n",
2085
              "   '.'],\n",
2086
              "  'ner_tags': [0,\n",
2087
              "   0,\n",
2088
              "   0,\n",
2089
              "   0,\n",
2090
              "   0,\n",
2091
              "   0,\n",
2092
              "   0,\n",
2093
              "   0,\n",
2094
              "   0,\n",
2095
              "   0,\n",
2096
              "   0,\n",
2097
              "   0,\n",
2098
              "   0,\n",
2099
              "   0,\n",
2100
              "   0,\n",
2101
              "   0,\n",
2102
              "   0,\n",
2103
              "   0,\n",
2104
              "   0,\n",
2105
              "   0,\n",
2106
              "   0,\n",
2107
              "   0,\n",
2108
              "   0,\n",
2109
              "   0,\n",
2110
              "   0,\n",
2111
              "   0,\n",
2112
              "   0,\n",
2113
              "   0,\n",
2114
              "   0,\n",
2115
              "   0,\n",
2116
              "   0,\n",
2117
              "   0,\n",
2118
              "   0,\n",
2119
              "   0,\n",
2120
              "   0,\n",
2121
              "   0,\n",
2122
              "   0,\n",
2123
              "   0,\n",
2124
              "   0,\n",
2125
              "   0,\n",
2126
              "   0,\n",
2127
              "   0,\n",
2128
              "   0,\n",
2129
              "   0,\n",
2130
              "   0,\n",
2131
              "   0,\n",
2132
              "   0,\n",
2133
              "   0,\n",
2134
              "   0,\n",
2135
              "   0,\n",
2136
              "   0,\n",
2137
              "   0,\n",
2138
              "   0,\n",
2139
              "   0]},\n",
2140
              " {'tokens': ['Patients',\n",
2141
              "   'unable',\n",
2142
              "   'or',\n",
2143
              "   'not willing',\n",
2144
              "   'to',\n",
2145
              "   'return',\n",
2146
              "   'for',\n",
2147
              "   'follow ',\n",
2148
              "   '-',\n",
2149
              "   ' ups',\n",
2150
              "   '.'],\n",
2151
              "  'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]},\n",
2152
              " {'tokens': ['Patient',\n",
2153
              "   'with',\n",
2154
              "   '\"',\n",
2155
              "   ' de novo ',\n",
2156
              "   '\"',\n",
2157
              "   'heart',\n",
2158
              "   'Failure',\n",
2159
              "   'and',\n",
2160
              "   'LVEF',\n",
2161
              "   '< =',\n",
2162
              "   '40',\n",
2163
              "   '%',\n",
2164
              "   'admitted',\n",
2165
              "   'in',\n",
2166
              "   'hospital',\n",
2167
              "   ',',\n",
2168
              "   'without',\n",
2169
              "   'contraindications',\n",
2170
              "   'for',\n",
2171
              "   'BB',\n",
2172
              "   'prescription',\n",
2173
              "   'with',\n",
2174
              "   'cardiologist',\n",
2175
              "   'up ',\n",
2176
              "   '-',\n",
2177
              "   ' titration',\n",
2178
              "   'prescription',\n",
2179
              "   'and',\n",
2180
              "   'without',\n",
2181
              "   'having',\n",
2182
              "   'achieved',\n",
2183
              "   'BB',\n",
2184
              "   'target',\n",
2185
              "   'dose',\n",
2186
              "   'previous',\n",
2187
              "   'discharge',\n",
2188
              "   'and',\n",
2189
              "   'signing',\n",
2190
              "   'informed',\n",
2191
              "   'consent',\n",
2192
              "   '.'],\n",
2193
              "  'ner_tags': [0,\n",
2194
              "   0,\n",
2195
              "   0,\n",
2196
              "   0,\n",
2197
              "   0,\n",
2198
              "   1,\n",
2199
              "   2,\n",
2200
              "   0,\n",
2201
              "   0,\n",
2202
              "   0,\n",
2203
              "   0,\n",
2204
              "   0,\n",
2205
              "   0,\n",
2206
              "   0,\n",
2207
              "   0,\n",
2208
              "   0,\n",
2209
              "   0,\n",
2210
              "   0,\n",
2211
              "   0,\n",
2212
              "   0,\n",
2213
              "   0,\n",
2214
              "   0,\n",
2215
              "   0,\n",
2216
              "   0,\n",
2217
              "   0,\n",
2218
              "   0,\n",
2219
              "   0,\n",
2220
              "   0,\n",
2221
              "   0,\n",
2222
              "   0,\n",
2223
              "   0,\n",
2224
              "   0,\n",
2225
              "   0,\n",
2226
              "   0,\n",
2227
              "   0,\n",
2228
              "   0,\n",
2229
              "   0,\n",
2230
              "   0,\n",
2231
              "   0,\n",
2232
              "   0,\n",
2233
              "   0]},\n",
2234
              " {'tokens': ['Blood culture',\n",
2235
              "   '-',\n",
2236
              "   'proven',\n",
2237
              "   'typhoid fever',\n",
2238
              "   '(',\n",
2239
              "   'S',\n",
2240
              "   '.',\n",
2241
              "   ' typhi',\n",
2242
              "   'or',\n",
2243
              "   'S',\n",
2244
              "   '.',\n",
2245
              "   ' paratyphi',\n",
2246
              "   ')'],\n",
2247
              "  'ner_tags': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]},\n",
2248
              " {'tokens': ['Signed',\n",
2249
              "   'informed',\n",
2250
              "   'consent',\n",
2251
              "   'to',\n",
2252
              "   'participate',\n",
2253
              "   'in',\n",
2254
              "   'the',\n",
2255
              "   'study',\n",
2256
              "   '.'],\n",
2257
              "  'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0]},\n",
2258
              " {'tokens': ['uterine', 'size', '< 12 weeks'], 'ner_tags': [0, 0, 0]},\n",
2259
              " {'tokens': ['presence',\n",
2260
              "   'of',\n",
2261
              "   'benign',\n",
2262
              "   'cause',\n",
2263
              "   'for',\n",
2264
              "   'the',\n",
2265
              "   'hysterectomy',\n",
2266
              "   'e',\n",
2267
              "   '.',\n",
2268
              "   'g',\n",
2269
              "   '.',\n",
2270
              "   'fibroid',\n",
2271
              "   'uterus',\n",
2272
              "   ',',\n",
2273
              "   'perimenopausal',\n",
2274
              "   'beeding',\n",
2275
              "   'not',\n",
2276
              "   'responding',\n",
2277
              "   'to',\n",
2278
              "   'medical',\n",
2279
              "   'treatment',\n",
2280
              "   'or',\n",
2281
              "   'complex',\n",
2282
              "   'endometrial',\n",
2283
              "   'hyperplasia',\n",
2284
              "   'without',\n",
2285
              "   'atypia',\n",
2286
              "   '.'],\n",
2287
              "  'ner_tags': [0,\n",
2288
              "   0,\n",
2289
              "   0,\n",
2290
              "   0,\n",
2291
              "   0,\n",
2292
              "   0,\n",
2293
              "   0,\n",
2294
              "   0,\n",
2295
              "   0,\n",
2296
              "   0,\n",
2297
              "   0,\n",
2298
              "   0,\n",
2299
              "   0,\n",
2300
              "   0,\n",
2301
              "   0,\n",
2302
              "   0,\n",
2303
              "   0,\n",
2304
              "   0,\n",
2305
              "   0,\n",
2306
              "   0,\n",
2307
              "   0,\n",
2308
              "   0,\n",
2309
              "   0,\n",
2310
              "   0,\n",
2311
              "   0,\n",
2312
              "   0,\n",
2313
              "   0,\n",
2314
              "   0]},\n",
2315
              " {'tokens': ['Absence',\n",
2316
              "   'of',\n",
2317
              "   'significant',\n",
2318
              "   'scarring',\n",
2319
              "   'in',\n",
2320
              "   'the',\n",
2321
              "   'pelvis',\n",
2322
              "   'from',\n",
2323
              "   'previous',\n",
2324
              "   'surgeries',\n",
2325
              "   '.'],\n",
2326
              "  'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]},\n",
2327
              " {'tokens': ['Patients', '=', '18 years', 'of age', 'or', 'older'],\n",
2328
              "  'ner_tags': [0, 0, 0, 0, 0, 0]},\n",
2329
              " {'tokens': ['Subjects',\n",
2330
              "   'must',\n",
2331
              "   'be',\n",
2332
              "   'able',\n",
2333
              "   'and',\n",
2334
              "   'willing',\n",
2335
              "   'to',\n",
2336
              "   'give',\n",
2337
              "   'written',\n",
2338
              "   'informed',\n",
2339
              "   'consent',\n",
2340
              "   'and',\n",
2341
              "   'to',\n",
2342
              "   'comply',\n",
2343
              "   'with',\n",
2344
              "   'the',\n",
2345
              "   'requirements',\n",
2346
              "   'of',\n",
2347
              "   'this',\n",
2348
              "   'study',\n",
2349
              "   'protocol'],\n",
2350
              "  'ner_tags': [15,\n",
2351
              "   0,\n",
2352
              "   0,\n",
2353
              "   0,\n",
2354
              "   0,\n",
2355
              "   0,\n",
2356
              "   0,\n",
2357
              "   15,\n",
2358
              "   15,\n",
2359
              "   15,\n",
2360
              "   15,\n",
2361
              "   15,\n",
2362
              "   15,\n",
2363
              "   15,\n",
2364
              "   15,\n",
2365
              "   15,\n",
2366
              "   15,\n",
2367
              "   15,\n",
2368
              "   15,\n",
2369
              "   15,\n",
2370
              "   15]},\n",
2371
              " {'tokens': ['Established',\n",
2372
              "   'diagnosis',\n",
2373
              "   'of',\n",
2374
              "   'UC',\n",
2375
              "   'and',\n",
2376
              "   'moderate ',\n",
2377
              "   '-',\n",
2378
              "   ' to ',\n",
2379
              "   '-',\n",
2380
              "   ' severe',\n",
2381
              "   'disease',\n",
2382
              "   'activity',\n",
2383
              "   ',',\n",
2384
              "   'defined',\n",
2385
              "   'as',\n",
2386
              "   'a',\n",
2387
              "   'Mayo',\n",
2388
              "   'score',\n",
2389
              "   'of',\n",
2390
              "   '6 ',\n",
2391
              "   '-',\n",
2392
              "   ' 12',\n",
2393
              "   ',',\n",
2394
              "   'with',\n",
2395
              "   'an',\n",
2396
              "   'endoscopic',\n",
2397
              "   'subscore',\n",
2398
              "   '=',\n",
2399
              "   '2',\n",
2400
              "   '.'],\n",
2401
              "  'ner_tags': [1,\n",
2402
              "   0,\n",
2403
              "   0,\n",
2404
              "   2,\n",
2405
              "   0,\n",
2406
              "   1,\n",
2407
              "   2,\n",
2408
              "   1,\n",
2409
              "   2,\n",
2410
              "   2,\n",
2411
              "   0,\n",
2412
              "   0,\n",
2413
              "   0,\n",
2414
              "   0,\n",
2415
              "   0,\n",
2416
              "   9,\n",
2417
              "   9,\n",
2418
              "   0,\n",
2419
              "   0,\n",
2420
              "   0,\n",
2421
              "   0,\n",
2422
              "   0,\n",
2423
              "   0,\n",
2424
              "   0,\n",
2425
              "   0,\n",
2426
              "   9,\n",
2427
              "   0,\n",
2428
              "   0,\n",
2429
              "   0,\n",
2430
              "   0]},\n",
2431
              " {'tokens': ['Patients',\n",
2432
              "   'had',\n",
2433
              "   'an',\n",
2434
              "   'inadequate',\n",
2435
              "   'response',\n",
2436
              "   'to',\n",
2437
              "   ',',\n",
2438
              "   'or',\n",
2439
              "   'had',\n",
2440
              "   'failed',\n",
2441
              "   'to',\n",
2442
              "   'tolerate',\n",
2443
              "   ',',\n",
2444
              "   '1',\n",
2445
              "   'or',\n",
2446
              "   'more',\n",
2447
              "   'of',\n",
2448
              "   'the',\n",
2449
              "   'following',\n",
2450
              "   'conventional',\n",
2451
              "   'therapies',\n",
2452
              "   ':',\n",
2453
              "   'oral',\n",
2454
              "   '5 ',\n",
2455
              "   '-',\n",
2456
              "   ' aminosalicylates',\n",
2457
              "   ',',\n",
2458
              "   'oral',\n",
2459
              "   'corticosteroids',\n",
2460
              "   ',',\n",
2461
              "   'azathioprine ',\n",
2462
              "   '(',\n",
2463
              "   ' AZA ',\n",
2464
              "   ')',\n",
2465
              "   ',',\n",
2466
              "   'and/or',\n",
2467
              "   '6 ',\n",
2468
              "   '-',\n",
2469
              "   ' mercaptopurine ',\n",
2470
              "   '(',\n",
2471
              "   ' 6MP ',\n",
2472
              "   ')',\n",
2473
              "   ';',\n",
2474
              "   'or',\n",
2475
              "   'corticosteroid dependent ',\n",
2476
              "   '(',\n",
2477
              "   ' ie',\n",
2478
              "   ',',\n",
2479
              "   ' an inability to taper corticosteroids without recurrence of UC symptoms ',\n",
2480
              "   ')',\n",
2481
              "   '.'],\n",
2482
              "  'ner_tags': [0,\n",
2483
              "   0,\n",
2484
              "   0,\n",
2485
              "   0,\n",
2486
              "   0,\n",
2487
              "   0,\n",
2488
              "   0,\n",
2489
              "   0,\n",
2490
              "   0,\n",
2491
              "   0,\n",
2492
              "   0,\n",
2493
              "   0,\n",
2494
              "   0,\n",
2495
              "   0,\n",
2496
              "   0,\n",
2497
              "   0,\n",
2498
              "   0,\n",
2499
              "   0,\n",
2500
              "   0,\n",
2501
              "   0,\n",
2502
              "   0,\n",
2503
              "   0,\n",
2504
              "   0,\n",
2505
              "   0,\n",
2506
              "   0,\n",
2507
              "   0,\n",
2508
              "   0,\n",
2509
              "   0,\n",
2510
              "   0,\n",
2511
              "   0,\n",
2512
              "   0,\n",
2513
              "   0,\n",
2514
              "   0,\n",
2515
              "   0,\n",
2516
              "   0,\n",
2517
              "   0,\n",
2518
              "   0,\n",
2519
              "   0,\n",
2520
              "   0,\n",
2521
              "   0,\n",
2522
              "   0,\n",
2523
              "   0,\n",
2524
              "   0,\n",
2525
              "   0,\n",
2526
              "   0,\n",
2527
              "   0,\n",
2528
              "   0,\n",
2529
              "   0,\n",
2530
              "   0,\n",
2531
              "   0,\n",
2532
              "   0]},\n",
2533
              " {'tokens': ['Patients',\n",
2534
              "   'concurrently',\n",
2535
              "   'treated',\n",
2536
              "   'with',\n",
2537
              "   'oral',\n",
2538
              "   '5 ',\n",
2539
              "   '-',\n",
2540
              "   ' aminosalicylates',\n",
2541
              "   'or',\n",
2542
              "   'corticosteroids',\n",
2543
              "   'were',\n",
2544
              "   'to receive',\n",
2545
              "   'a stable',\n",
2546
              "   'dose',\n",
2547
              "   'for',\n",
2548
              "   'at',\n",
2549
              "   'least',\n",
2550
              "   '2 weeks',\n",
2551
              "   'before',\n",
2552
              "   'baseline',\n",
2553
              "   ',',\n",
2554
              "   'and',\n",
2555
              "   'patients',\n",
2556
              "   'receiving',\n",
2557
              "   'AZA',\n",
2558
              "   'and/or',\n",
2559
              "   '6MP',\n",
2560
              "   'were',\n",
2561
              "   'to receive',\n",
2562
              "   'a stable',\n",
2563
              "   'dose',\n",
2564
              "   'for',\n",
2565
              "   'at',\n",
2566
              "   'least',\n",
2567
              "   '4 weeks',\n",
2568
              "   'before',\n",
2569
              "   'baseline',\n",
2570
              "   '.',\n",
2571
              "   'Patients',\n",
2572
              "   'were',\n",
2573
              "   'required',\n",
2574
              "   'to maintain',\n",
2575
              "   'stable',\n",
2576
              "   'doses',\n",
2577
              "   'of',\n",
2578
              "   'their',\n",
2579
              "   'concomitant',\n",
2580
              "   'UC',\n",
2581
              "   'medications',\n",
2582
              "   'during',\n",
2583
              "   'the',\n",
2584
              "   'study'],\n",
2585
              "  'ner_tags': [0,\n",
2586
              "   5,\n",
2587
              "   0,\n",
2588
              "   0,\n",
2589
              "   0,\n",
2590
              "   0,\n",
2591
              "   0,\n",
2592
              "   0,\n",
2593
              "   0,\n",
2594
              "   0,\n",
2595
              "   0,\n",
2596
              "   0,\n",
2597
              "   0,\n",
2598
              "   0,\n",
2599
              "   0,\n",
2600
              "   0,\n",
2601
              "   0,\n",
2602
              "   0,\n",
2603
              "   0,\n",
2604
              "   0,\n",
2605
              "   0,\n",
2606
              "   0,\n",
2607
              "   0,\n",
2608
              "   0,\n",
2609
              "   0,\n",
2610
              "   0,\n",
2611
              "   0,\n",
2612
              "   0,\n",
2613
              "   0,\n",
2614
              "   0,\n",
2615
              "   0,\n",
2616
              "   0,\n",
2617
              "   0,\n",
2618
              "   0,\n",
2619
              "   0,\n",
2620
              "   0,\n",
2621
              "   0,\n",
2622
              "   0,\n",
2623
              "   0,\n",
2624
              "   0,\n",
2625
              "   0,\n",
2626
              "   0,\n",
2627
              "   0,\n",
2628
              "   0,\n",
2629
              "   0,\n",
2630
              "   0,\n",
2631
              "   0,\n",
2632
              "   0,\n",
2633
              "   0,\n",
2634
              "   0,\n",
2635
              "   0,\n",
2636
              "   0]},\n",
2637
              " {'tokens': ['Female',\n",
2638
              "   'subjects',\n",
2639
              "   'of',\n",
2640
              "   'child',\n",
2641
              "   'bearing',\n",
2642
              "   'potential',\n",
2643
              "   'must',\n",
2644
              "   'be',\n",
2645
              "   'willing',\n",
2646
              "   'to',\n",
2647
              "   'ensure',\n",
2648
              "   'that',\n",
2649
              "   'they',\n",
2650
              "   'or',\n",
2651
              "   'their',\n",
2652
              "   'partner',\n",
2653
              "   'use',\n",
2654
              "   'effective',\n",
2655
              "   'contraception',\n",
2656
              "   'during',\n",
2657
              "   'the',\n",
2658
              "   'study',\n",
2659
              "   'and',\n",
2660
              "   'for',\n",
2661
              "   'OR',\n",
2662
              "   'be',\n",
2663
              "   'surgically',\n",
2664
              "   'sterile',\n",
2665
              "   'or',\n",
2666
              "   'postmenopausal',\n",
2667
              "   '.'],\n",
2668
              "  'ner_tags': [15,\n",
2669
              "   0,\n",
2670
              "   0,\n",
2671
              "   1,\n",
2672
              "   0,\n",
2673
              "   0,\n",
2674
              "   0,\n",
2675
              "   0,\n",
2676
              "   0,\n",
2677
              "   0,\n",
2678
              "   0,\n",
2679
              "   0,\n",
2680
              "   0,\n",
2681
              "   0,\n",
2682
              "   0,\n",
2683
              "   0,\n",
2684
              "   0,\n",
2685
              "   0,\n",
2686
              "   0,\n",
2687
              "   0,\n",
2688
              "   0,\n",
2689
              "   0,\n",
2690
              "   0,\n",
2691
              "   0,\n",
2692
              "   0,\n",
2693
              "   0,\n",
2694
              "   0,\n",
2695
              "   0,\n",
2696
              "   0,\n",
2697
              "   0,\n",
2698
              "   0]},\n",
2699
              " {'tokens': ['Surgical',\n",
2700
              "   'sterilized',\n",
2701
              "   'female',\n",
2702
              "   'patients',\n",
2703
              "   'with',\n",
2704
              "   'documentation',\n",
2705
              "   'of',\n",
2706
              "   'prior',\n",
2707
              "   'hysterectomy',\n",
2708
              "   ',',\n",
2709
              "   'tubal',\n",
2710
              "   'ligation',\n",
2711
              "   'or',\n",
2712
              "   'patients',\n",
2713
              "   'with',\n",
2714
              "   'a',\n",
2715
              "   'history',\n",
2716
              "   'of',\n",
2717
              "   'bilateral',\n",
2718
              "   'oophorectomy',\n",
2719
              "   'AND',\n",
2720
              "   'patients',\n",
2721
              "   'with',\n",
2722
              "   'a',\n",
2723
              "   'history',\n",
2724
              "   'of',\n",
2725
              "   'bilateral',\n",
2726
              "   'oophorectomy',\n",
2727
              "   'AND',\n",
2728
              "   'hysterectomy'],\n",
2729
              "  'ner_tags': [0,\n",
2730
              "   0,\n",
2731
              "   0,\n",
2732
              "   0,\n",
2733
              "   0,\n",
2734
              "   0,\n",
2735
              "   0,\n",
2736
              "   0,\n",
2737
              "   0,\n",
2738
              "   0,\n",
2739
              "   0,\n",
2740
              "   0,\n",
2741
              "   0,\n",
2742
              "   0,\n",
2743
              "   0,\n",
2744
              "   0,\n",
2745
              "   0,\n",
2746
              "   0,\n",
2747
              "   0,\n",
2748
              "   0,\n",
2749
              "   0,\n",
2750
              "   0,\n",
2751
              "   0,\n",
2752
              "   0,\n",
2753
              "   0,\n",
2754
              "   0,\n",
2755
              "   0,\n",
2756
              "   0,\n",
2757
              "   0,\n",
2758
              "   0]},\n",
2759
              " {'tokens': ['Postmenopausal',\n",
2760
              "   'women',\n",
2761
              "   'with',\n",
2762
              "   'postmenopausal',\n",
2763
              "   'defined',\n",
2764
              "   'as',\n",
2765
              "   'permanent',\n",
2766
              "   'cessation',\n",
2767
              "   '>',\n",
2768
              "   '1',\n",
2769
              "   'year',\n",
2770
              "   'of',\n",
2771
              "   'previously',\n",
2772
              "   'occurring',\n",
2773
              "   'menses',\n",
2774
              "   '.'],\n",
2775
              "  'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]},\n",
2776
              " {'tokens': ['Female subjects',\n",
2777
              "   'serum pregnancy test performed at the screening visit and',\n",
2778
              "   'urine pregnancy test performed at the baseline visit must be negative',\n",
2779
              "   '.'],\n",
2780
              "  'ner_tags': [0, 0, 0, 0]},\n",
2781
              " {'tokens': ['Subjects',\n",
2782
              "   'have',\n",
2783
              "   'following',\n",
2784
              "   'investigations',\n",
2785
              "   'within',\n",
2786
              "   '1 month',\n",
2787
              "   'prior',\n",
2788
              "   'to',\n",
2789
              "   'enrolment',\n",
2790
              "   '.'],\n",
2791
              "  'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}]"
2792
            ]
2793
          },
2794
          "metadata": {},
2795
          "execution_count": 74
2796
        }
2797
      ]
2798
    },
2799
    {
2800
      "cell_type": "code",
2801
      "execution_count": 75,
2802
      "metadata": {
2803
        "id": "Sro66gmXWJJ3"
2804
      },
2805
      "outputs": [],
2806
      "source": [
2807
        "p1_df = pd.DataFrame(p1_annotations)\n",
2808
        "p1_dataset = Dataset.from_pandas(p1_df)"
2809
      ]
2810
    },
2811
    {
2812
      "cell_type": "code",
2813
      "execution_count": 76,
2814
      "metadata": {
2815
        "colab": {
2816
          "base_uri": "https://localhost:8080/",
2817
          "height": 49,
2818
          "referenced_widgets": [
2819
            "c070b80f8b404d7aa1945ce74f04e669",
2820
            "efd95411b98444c39eacc1b3ba12cf9e",
2821
            "f3b4b928a53444cf9133d950c79be90a",
2822
            "8ce9a6d482554cff8218b795a2b7afe5",
2823
            "4410cb65ce684026bac9d6bce8204641",
2824
            "bbff96b8aab24b1dacce61988d890bde",
2825
            "9b46df12fda94d0193f51b1517316468",
2826
            "2a1d2c6b26384a14903f1a186de1f32f",
2827
            "0458e8b753dd4412bda534234c52e48f",
2828
            "0ed9463a4fc74d1daf25e7273d2025da",
2829
            "6f26495add4a4869abf3b830919feeb0"
2830
          ]
2831
        },
2832
        "id": "y4qfjZXFWJJ3",
2833
        "outputId": "dc334ebb-bd42-4f48-dd12-852a9d7b801d"
2834
      },
2835
      "outputs": [
2836
        {
2837
          "output_type": "display_data",
2838
          "data": {
2839
            "text/plain": [
2840
              "Map:   0%|          | 0/50 [00:00<?, ? examples/s]"
2841
            ],
2842
            "application/vnd.jupyter.widget-view+json": {
2843
              "version_major": 2,
2844
              "version_minor": 0,
2845
              "model_id": "c070b80f8b404d7aa1945ce74f04e669"
2846
            }
2847
          },
2848
          "metadata": {}
2849
        }
2850
      ],
2851
      "source": [
2852
        "p1_dataset = p1_dataset.map(tokenize_and_align_labels, batched=True)"
2853
      ]
2854
    },
2855
    {
2856
      "cell_type": "code",
2857
      "execution_count": 78,
2858
      "metadata": {
2859
        "id": "UYmC5TRGWJJ3"
2860
      },
2861
      "outputs": [],
2862
      "source": [
2863
        "# keep just sentences with the same length\n",
2864
        "sentences_to_evaluate_p1 = []\n",
2865
        "sentences_to_evaluate_true = []\n",
2866
        "\n",
2867
        "for i in range(len(p1_dataset)):\n",
2868
        "    if len(p1_dataset['labels'][i]) == len(true_ann_dataset['labels'][i]):\n",
2869
        "        sentences_to_evaluate_p1.append(p1_dataset['labels'][i])\n",
2870
        "        sentences_to_evaluate_true.append(true_ann_dataset['labels'][i])"
2871
      ]
2872
    },
2873
    {
2874
      "cell_type": "code",
2875
      "execution_count": 79,
2876
      "metadata": {
2877
        "colab": {
2878
          "base_uri": "https://localhost:8080/"
2879
        },
2880
        "id": "4iluystIWJJ3",
2881
        "outputId": "0d3c4a3a-c7da-4912-9975-3056a4d7f2cb"
2882
      },
2883
      "outputs": [
2884
        {
2885
          "output_type": "stream",
2886
          "name": "stdout",
2887
          "text": [
2888
            "0.72\n"
2889
          ]
2890
        }
2891
      ],
2892
      "source": [
2893
        "print(len(sentences_to_evaluate_p1)/len(p1_dataset))"
2894
      ]
2895
    },
2896
    {
2897
      "cell_type": "code",
2898
      "execution_count": 38,
2899
      "metadata": {
2900
        "id": "QoWw7lVoWJJ3"
2901
      },
2902
      "outputs": [],
2903
      "source": [
2904
        "def get_labels(p):\n",
2905
        "    predictions, labels = p\n",
2906
        "    # Remove ignored index (special tokens)\n",
2907
        "    predictions = [\n",
2908
        "        [entities_list[p] for (p, l) in zip(prediction, label) if l != -100]\n",
2909
        "        for prediction, label in zip(predictions, labels)\n",
2910
        "    ]\n",
2911
        "    labels = [\n",
2912
        "        [entities_list[l] for (p, l) in zip(prediction, label) if l != -100]\n",
2913
        "        for prediction, label in zip(predictions, labels)\n",
2914
        "    ]\n",
2915
        "\n",
2916
        "    return predictions, labels"
2917
      ]
2918
    },
2919
    {
2920
      "cell_type": "code",
2921
      "execution_count": 81,
2922
      "metadata": {
2923
        "id": "yUE1LpxfWJJ3"
2924
      },
2925
      "outputs": [],
2926
      "source": [
2927
        "pred_labels, true_labels = get_labels((sentences_to_evaluate_p1, sentences_to_evaluate_true))"
2928
      ]
2929
    },
2930
    {
2931
      "cell_type": "code",
2932
      "execution_count": 83,
2933
      "metadata": {
2934
        "id": "vZhKtEs0WJJ3"
2935
      },
2936
      "outputs": [],
2937
      "source": [
2938
        "from eval_file import *"
2939
      ]
2940
    },
2941
    {
2942
      "cell_type": "code",
2943
      "source": [
2944
        "# from eval_file import *\n",
2945
        "\n",
2946
        "import argparse\n",
2947
        "from collections import defaultdict\n",
2948
        "from itertools import chain\n",
2949
        "from math import pow\n",
2950
        "from pathlib import Path\n",
2951
        "\n",
2952
        "# from common_utils.common_io import load_bio_file_into_sents\n",
2953
        "# from common_utils.common_log import create_logger\n",
2954
        "# -*- coding: utf-8 -*-\n",
2955
        "\n",
2956
        "# -*- coding: utf-8 -*-\n",
2957
        "\n",
2958
        "import json\n",
2959
        "import pickle as pkl\n",
2960
        "\n",
2961
        "\n",
2962
        "def read_from_file(ifn):\n",
2963
        "    with open(ifn, \"r\") as f:\n",
2964
        "        text = f.read()\n",
2965
        "    return text\n",
2966
        "\n",
2967
        "\n",
2968
        "def write_to_file(text, ofn):\n",
2969
        "    with open(ofn, \"w\") as f:\n",
2970
        "        f.write(text)\n",
2971
        "    return True\n",
2972
        "\n",
2973
        "\n",
2974
        "def pkl_load(ifn):\n",
2975
        "    with open(ifn, \"rb\") as f:\n",
2976
        "        pdata = pkl.load(f)\n",
2977
        "    return pdata\n",
2978
        "\n",
2979
        "\n",
2980
        "def pkl_dump(pdata, ofn):\n",
2981
        "    with open(ofn, \"wb\") as f:\n",
2982
        "        pkl.dump(pdata, f)\n",
2983
        "    return True\n",
2984
        "\n",
2985
        "\n",
2986
        "def json_load(ifn):\n",
2987
        "    with open(ifn, \"r\") as f:\n",
2988
        "        jdata = json.load(f)\n",
2989
        "    return jdata\n",
2990
        "\n",
2991
        "\n",
2992
        "def json_dump(jdata, ofn):\n",
2993
        "    with open(ofn, \"w\") as f:\n",
2994
        "        json.dump(jdata, f)\n",
2995
        "    return True\n",
2996
        "\n",
2997
        "\n",
2998
        "def load_bio_file_into_sents(bio_file, word_sep=\" \", do_lower=False):\n",
2999
        "    bio_text = read_from_file(bio_file)\n",
3000
        "    bio_text = bio_text.strip()\n",
3001
        "    if do_lower:\n",
3002
        "        bio_text = bio_text.lower()\n",
3003
        "\n",
3004
        "    new_sents = []\n",
3005
        "    sents = bio_text.split(\"\\n\\n\")\n",
3006
        "\n",
3007
        "    for sent in sents:\n",
3008
        "        new_sent = []\n",
3009
        "        words = sent.split(\"\\n\")\n",
3010
        "        for word in words:\n",
3011
        "            new_word = word.split(word_sep)\n",
3012
        "            new_sent.append(new_word)\n",
3013
        "        new_sents.append(new_sent)\n",
3014
        "\n",
3015
        "    return new_sents\n",
3016
        "\n",
3017
        "\n",
3018
        "def output_bio(bio_data, output_file, sep=\" \"):\n",
3019
        "    with open(output_file, \"w\") as f:\n",
3020
        "        for sent in bio_data:\n",
3021
        "            for word in sent:\n",
3022
        "                line = sep.join(word)\n",
3023
        "                f.write(line)\n",
3024
        "                f.write(\"\\n\")\n",
3025
        "            f.write(\"\\n\")\n",
3026
        "\n",
3027
        "\n",
3028
        "class PRF:\n",
3029
        "    def __init__(self):\n",
3030
        "        self.true = 0\n",
3031
        "        self.false = 0\n",
3032
        "\n",
3033
        "    def add_true_case(self):\n",
3034
        "        self.true += 1\n",
3035
        "\n",
3036
        "    def add_false_case(self):\n",
3037
        "        self.false += 1\n",
3038
        "\n",
3039
        "    def get_true_false_counts(self):\n",
3040
        "        return self.true, self.false\n",
3041
        "\n",
3042
        "    def __str__(self):\n",
3043
        "        return str(self.__dict__)\n",
3044
        "\n",
3045
        "\n",
3046
        "class BioEval:\n",
3047
        "    def __init__(self):\n",
3048
        "        self.acc = PRF()\n",
3049
        "        # prediction\n",
3050
        "        self.all_strict = PRF()\n",
3051
        "        self.all_relax = PRF()\n",
3052
        "        self.cat_strict = defaultdict(PRF)\n",
3053
        "        self.cat_relax = defaultdict(PRF)\n",
3054
        "        # gold standard\n",
3055
        "        self.gs_all = 0\n",
3056
        "        self.gs_cat = defaultdict(int)\n",
3057
        "        self.performance = dict()\n",
3058
        "        self.counts = dict()\n",
3059
        "        self.beta = 1\n",
3060
        "        self.label_not_for_eval = {'o'}\n",
3061
        "\n",
3062
        "    def reset(self):\n",
3063
        "        self.acc = PRF()\n",
3064
        "        self.all_strict = PRF()\n",
3065
        "        self.all_relax = PRF()\n",
3066
        "        self.cat_strict = defaultdict(PRF)\n",
3067
        "        self.cat_relax = defaultdict(PRF)\n",
3068
        "        self.gs_all = 0\n",
3069
        "        self.gs_cat = defaultdict(int)\n",
3070
        "        self.performance = dict()\n",
3071
        "        self.counts = dict()\n",
3072
        "\n",
3073
        "    def set_beta_for_f_score(self, beta):\n",
3074
        "        print(\"Using beta={} for calculating F-score\".format(beta))\n",
3075
        "        self.beta = beta\n",
3076
        "\n",
3077
        "    # def set_logger(self, logger):\n",
3078
        "    #     self.logger = logger\n",
3079
        "\n",
3080
        "    def add_labels_not_for_eval(self, *labels):\n",
3081
        "        for each in labels:\n",
3082
        "            self.label_not_for_eval.add(each.lower())\n",
3083
        "\n",
3084
        "    def __calc_prf(self, tp, fp, tp_tn):\n",
3085
        "        \"\"\"\n",
3086
        "        Using this function to calculate F-beta score, beta=1 is f_score-score, set beta=2 favor recall, and set beta=0.5 favor precision.\n",
3087
        "        Using set_beta_for_f_score function to change beta value.\n",
3088
        "        \"\"\"\n",
3089
        "        tp_fp = tp + fp\n",
3090
        "        pre = 1.0 * tp / tp_fp if tp_fp > 0 else 0.0\n",
3091
        "        rec = 1.0 * tp / tp_tn if tp_tn > 0 else 0.0\n",
3092
        "        beta2 = pow(self.beta, 2)\n",
3093
        "        f_beta = (1 + beta2) * pre * rec / (beta2 * pre + rec) if (pre + rec) > 0 else 0.0\n",
3094
        "        return pre, rec, f_beta\n",
3095
        "\n",
3096
        "    def __measure_performance(self):\n",
3097
        "        self.performance['overall'] = dict()\n",
3098
        "\n",
3099
        "        acc_true_num, acc_false_num = self.acc.get_true_false_counts()\n",
3100
        "        total_acc_num = acc_true_num + acc_false_num\n",
3101
        "        # calc acc\n",
3102
        "        overall_acc = round(1.0 * acc_true_num / total_acc_num, 4) if total_acc_num > 0 else 0.0\n",
3103
        "        self.performance['overall']['acc'] = overall_acc\n",
3104
        "\n",
3105
        "        strict_true_counts, strict_false_counts = self.all_strict.get_true_false_counts()\n",
3106
        "        strict_pre, strict_rec, strict_f_score = self.__calc_prf(strict_true_counts, strict_false_counts, self.gs_all)\n",
3107
        "        self.performance['overall']['strict'] = dict()\n",
3108
        "        self.performance['overall']['strict']['precision'] = strict_pre\n",
3109
        "        self.performance['overall']['strict']['recall'] = strict_rec\n",
3110
        "        self.performance['overall']['strict']['f_score'] = strict_f_score\n",
3111
        "\n",
3112
        "        relax_true_counts, relax_false_counts = self.all_relax.get_true_false_counts()\n",
3113
        "        relax_pre, relax_rec, relax_f_score = self.__calc_prf(relax_true_counts, relax_false_counts, self.gs_all)\n",
3114
        "        self.performance['overall']['relax'] = dict()\n",
3115
        "        self.performance['overall']['relax']['precision'] = relax_pre\n",
3116
        "        self.performance['overall']['relax']['recall'] = relax_rec\n",
3117
        "        self.performance['overall']['relax']['f_score'] = relax_f_score\n",
3118
        "\n",
3119
        "        self.performance['category'] = dict()\n",
3120
        "        self.performance['category']['strict'] = dict()\n",
3121
        "        for k, v in self.cat_strict.items():\n",
3122
        "            self.performance['category']['strict'][k] = dict()\n",
3123
        "            stc, sfc = v.get_true_false_counts()\n",
3124
        "            p, r, f = self.__calc_prf(stc, sfc, self.gs_cat[k])\n",
3125
        "            self.performance['category']['strict'][k]['precision'] = p\n",
3126
        "            self.performance['category']['strict'][k]['recall'] = r\n",
3127
        "            self.performance['category']['strict'][k]['f_score'] = f\n",
3128
        "\n",
3129
        "        self.performance['category']['relax'] = dict()\n",
3130
        "        for k, v in self.cat_relax.items():\n",
3131
        "            self.performance['category']['relax'][k] = dict()\n",
3132
        "            rtc, rfc = v.get_true_false_counts()\n",
3133
        "            p, r, f = self.__calc_prf(rtc, rfc, self.gs_cat[k])\n",
3134
        "            self.performance['category']['relax'][k]['precision'] = p\n",
3135
        "            self.performance['category']['relax'][k]['recall'] = r\n",
3136
        "            self.performance['category']['relax'][k]['f_score'] = f\n",
3137
        "\n",
3138
        "    def __measure_counts(self):\n",
3139
        "        # gold standard\n",
3140
        "        self.counts['expect'] = dict()\n",
3141
        "        self.counts['expect']['overall'] = self.gs_all\n",
3142
        "        for k, v in self.gs_cat.items():\n",
3143
        "            self.counts['expect'][k] = v\n",
3144
        "        # prediction\n",
3145
        "        self.counts['prediction'] = {'strict': dict(), 'relax': dict()}\n",
3146
        "        # strict\n",
3147
        "        strict_true_counts, strict_false_counts = self.all_strict.get_true_false_counts()\n",
3148
        "        self.counts['prediction']['strict']['overall'] = dict()\n",
3149
        "        self.counts['prediction']['strict']['overall']['total'] = strict_true_counts + strict_false_counts\n",
3150
        "        self.counts['prediction']['strict']['overall']['true'] = strict_true_counts\n",
3151
        "        self.counts['prediction']['strict']['overall']['false'] = strict_false_counts\n",
3152
        "        for k, v in self.cat_strict.items():\n",
3153
        "            t, f = v.get_true_false_counts()\n",
3154
        "            self.counts['prediction']['strict'][k] = dict()\n",
3155
        "            self.counts['prediction']['strict'][k]['total'] = t + f\n",
3156
        "            self.counts['prediction']['strict'][k]['true'] = t\n",
3157
        "            self.counts['prediction']['strict'][k]['false'] = f\n",
3158
        "        # relax\n",
3159
        "        relax_true_counts, relax_false_counts = self.all_relax.get_true_false_counts()\n",
3160
        "        self.counts['prediction']['relax']['overall'] = dict()\n",
3161
        "        self.counts['prediction']['relax']['overall']['total'] = relax_true_counts + relax_false_counts\n",
3162
        "        self.counts['prediction']['relax']['overall']['true'] = relax_true_counts\n",
3163
        "        self.counts['prediction']['relax']['overall']['false'] = relax_false_counts\n",
3164
        "        for k, v in self.cat_relax.items():\n",
3165
        "            t, f = v.get_true_false_counts()\n",
3166
        "            self.counts['prediction']['relax'][k] = dict()\n",
3167
        "            self.counts['prediction']['relax'][k]['total'] = t + f\n",
3168
        "            self.counts['prediction']['relax'][k]['true'] = t\n",
3169
        "            self.counts['prediction']['relax'][k]['false'] = f\n",
3170
        "\n",
3171
        "    @staticmethod\n",
3172
        "    def __strict_match(gs, pred, s_idx, e_idx, en_type):\n",
3173
        "        if e_idx < len(gs) and gs[e_idx] == f\"i-{en_type}\":\n",
3174
        "            # check token after end in GS is not continued entity token\n",
3175
        "            return False\n",
3176
        "        elif gs[s_idx] != f\"b-{en_type}\" or pred[s_idx] != f\"b-{en_type}\":\n",
3177
        "            # force first token to be B-\n",
3178
        "            return False\n",
3179
        "        # check every token in span is the same\n",
3180
        "        for idx in range(s_idx, e_idx):\n",
3181
        "            if gs[idx] != pred[idx]:\n",
3182
        "                return False\n",
3183
        "        return True\n",
3184
        "\n",
3185
        "    @staticmethod\n",
3186
        "    def __relax_match(gs, pred, s_idx, e_idx, en_type):\n",
3187
        "        # we adopt the partial match strategy which is very loose compare to right-left or approximate match\n",
3188
        "        for idx in range(s_idx, e_idx):\n",
3189
        "            gs_cate = gs[idx].split(\"-\")[-1]\n",
3190
        "            pred_bound, pred_cate = pred[idx].split(\"-\")\n",
3191
        "            if gs_cate == pred_cate == en_type:\n",
3192
        "                return True\n",
3193
        "        return False\n",
3194
        "\n",
3195
        "    @staticmethod\n",
3196
        "    def __check_evaluated_already(gs_dict, cate, start_idx, end_idx):\n",
3197
        "        for k, v in gs_dict.items():\n",
3198
        "            c, s, e = k\n",
3199
        "            if not (e < start_idx or s > end_idx) and c == cate:\n",
3200
        "                if v == 0:\n",
3201
        "                    return True\n",
3202
        "                else:\n",
3203
        "                    gs_dict[k] -= 1\n",
3204
        "                    return False\n",
3205
        "        return False\n",
3206
        "\n",
3207
        "    def __process_bio(self, gs_bio, pred_bio):\n",
3208
        "        # measure acc\n",
3209
        "        for w_idx, (gs_word, pred_word) in enumerate(zip(gs_bio, pred_bio)):\n",
3210
        "            # measure acc\n",
3211
        "            if gs_word == pred_word:\n",
3212
        "                self.acc.add_true_case()\n",
3213
        "            else:\n",
3214
        "                self.acc.add_false_case()\n",
3215
        "\n",
3216
        "        # process gold standard\n",
3217
        "        llen = len(gs_bio)\n",
3218
        "        gs_dict = defaultdict(int)\n",
3219
        "        cur_idx = 0\n",
3220
        "        while cur_idx < llen:\n",
3221
        "            if gs_bio[cur_idx].strip() in self.label_not_for_eval:\n",
3222
        "                cur_idx += 1\n",
3223
        "            else:\n",
3224
        "                start_idx = cur_idx\n",
3225
        "                end_idx = start_idx + 1\n",
3226
        "                _, cate = gs_bio[start_idx].strip().split('-')\n",
3227
        "                while end_idx < llen and gs_bio[end_idx].strip() == f\"i-{cate}\":\n",
3228
        "                    end_idx += 1\n",
3229
        "                self.gs_all += 1\n",
3230
        "                self.gs_cat[cate] += 1\n",
3231
        "                gs_dict[(cate, start_idx, end_idx)] += 1\n",
3232
        "                cur_idx = end_idx\n",
3233
        "        # process predictions\n",
3234
        "        cur_idx = 0\n",
3235
        "        while cur_idx < llen:\n",
3236
        "            if pred_bio[cur_idx].strip() in self.label_not_for_eval:\n",
3237
        "                cur_idx += 1\n",
3238
        "            else:\n",
3239
        "                start_idx = cur_idx\n",
3240
        "                end_idx = start_idx + 1\n",
3241
        "                _, cate = pred_bio[start_idx].strip().split(\"-\")\n",
3242
        "                while end_idx < llen and pred_bio[end_idx].strip() == f\"i-{cate}\":\n",
3243
        "                    end_idx += 1\n",
3244
        "                if self.__strict_match(gs_bio, pred_bio, start_idx, end_idx, cate):\n",
3245
        "                    self.all_strict.add_true_case()\n",
3246
        "                    self.cat_strict[cate].add_true_case()\n",
3247
        "                    self.all_relax.add_true_case()\n",
3248
        "                    self.cat_relax[cate].add_true_case()\n",
3249
        "                elif self.__relax_match(gs_bio, pred_bio, start_idx, end_idx, cate):\n",
3250
        "                    if self.__check_evaluated_already(gs_dict, cate, start_idx, end_idx):\n",
3251
        "                        cur_idx = end_idx\n",
3252
        "                        continue\n",
3253
        "                    self.all_strict.add_false_case()\n",
3254
        "                    self.cat_strict[cate].add_false_case()\n",
3255
        "                    self.all_relax.add_true_case()\n",
3256
        "                    self.cat_relax[cate].add_true_case()\n",
3257
        "                else:\n",
3258
        "                    self.all_strict.add_false_case()\n",
3259
        "                    self.cat_strict[cate].add_false_case()\n",
3260
        "                    self.all_relax.add_false_case()\n",
3261
        "                    self.cat_relax[cate].add_false_case()\n",
3262
        "                cur_idx = end_idx\n",
3263
        "\n",
3264
        "    def eval_file(self, gs_file, pred_file):\n",
3265
        "        print(\"processing gold standard file: {} and prediciton file: {}\".format(gs_file, pred_file))\n",
3266
        "        pred_bio_sents = load_bio_file_into_sents(pred_file, do_lower=True)\n",
3267
        "        gs_bio_sents = load_bio_file_into_sents(gs_file, do_lower=True)\n",
3268
        "        # process bio data\n",
3269
        "        # check two data have same amount of sents\n",
3270
        "        assert len(gs_bio_sents) == len(pred_bio_sents), \\\n",
3271
        "            \"gold standard and prediction have different dimension: gs: {}; pred: {}\".format(len(gs_bio_sents), len(pred_bio_sents))\n",
3272
        "        # measure performance\n",
3273
        "        for s_idx, (gs_sent, pred_sent) in enumerate(zip(gs_bio_sents, pred_bio_sents)):\n",
3274
        "            # check two sents have same No. of words\n",
3275
        "            assert len(gs_sent) == len(pred_sent), \\\n",
3276
        "                \"In {}th sentence, the words counts are different; gs: {}; pred: {}\".format(s_idx, gs_sent, pred_sent)\n",
3277
        "            gs_sent = list(map(lambda x: x[-1], gs_sent))\n",
3278
        "            pred_sent = list(map(lambda x: x[-1], pred_sent))\n",
3279
        "            self.__process_bio(gs_sent, pred_sent)\n",
3280
        "        # get the evaluation matrix\n",
3281
        "        self.__measure_performance()\n",
3282
        "        self.__measure_counts()\n",
3283
        "\n",
3284
        "    def eval_mem(self, gs, pred, do_flat=False):\n",
3285
        "        # flat sents to sent; we assume input sequences only have 1 dimension (only labels)\n",
3286
        "        if do_flat:\n",
3287
        "            print('Sentences have been flatten to 1 dim.')\n",
3288
        "            gs = list(chain(*gs))\n",
3289
        "            pred = list(chain(*pred))\n",
3290
        "            gs = list(map(lambda x: x.lower(), gs))\n",
3291
        "            pred = list(map(lambda x: x.lower(), pred))\n",
3292
        "            self.__process_bio(gs, pred)\n",
3293
        "        else:\n",
3294
        "            for sidx, (gs_s, pred_s) in enumerate(zip(gs, pred)):\n",
3295
        "                gs_s = list(map(lambda x: x.lower(), gs_s))\n",
3296
        "                pred_s = list(map(lambda x: x.lower(), pred_s))\n",
3297
        "                self.__process_bio(gs_s, pred_s)\n",
3298
        "\n",
3299
        "        self.__measure_performance()\n",
3300
        "        self.__measure_counts()\n",
3301
        "\n",
3302
        "    def evaluate_annotations(self, gs, pred, do_lower=False):\n",
3303
        "        for gs_sent, pred_sent in zip(gs, pred):\n",
3304
        "            if do_lower:\n",
3305
        "              gs_sent = list(map(lambda x: x.lower(), gs_sent))\n",
3306
        "              pred_sent = list(map(lambda x: x.lower(), pred_sent))\n",
3307
        "            self.__process_bio(gs_sent, pred_sent)\n",
3308
        "\n",
3309
        "        self.__measure_performance()\n",
3310
        "        self.__measure_counts()\n",
3311
        "\n",
3312
        "    def get_performance(self):\n",
3313
        "        return self.performance\n",
3314
        "\n",
3315
        "    def get_counts(self):\n",
3316
        "        return self.counts\n",
3317
        "\n",
3318
        "    def save_evaluation(self, file):\n",
3319
        "        with open(file, \"w\") as f:\n",
3320
        "            json.dump(self.performance, f)\n",
3321
        "\n",
3322
        "    def show_evaluation(self, digits=4):\n",
3323
        "        if len(self.performance) == 0:\n",
3324
        "            raise RuntimeError('call eval_mem() first to get the performance attribute')\n",
3325
        "\n",
3326
        "        cate = self.performance['category']['strict'].keys()\n",
3327
        "\n",
3328
        "        headers = ['precision', 'recall', 'f1']\n",
3329
        "        width = max(max([len(c) for c in cate]), len('overall'), digits)\n",
3330
        "        head_fmt = '{:>{width}s} ' + ' {:>9}' * len(headers)\n",
3331
        "\n",
3332
        "        report = head_fmt.format(u'', *headers, width=width)\n",
3333
        "        report += '\\n\\nstrict\\n'\n",
3334
        "\n",
3335
        "        row_fmt = '{:>{width}s} ' + ' {:>9.{digits}f}' * 3 + '\\n'\n",
3336
        "        for c in cate:\n",
3337
        "            precision = self.performance['category']['strict'][c]['precision']\n",
3338
        "            recall = self.performance['category']['strict'][c]['recall']\n",
3339
        "            f1 = self.performance['category']['strict'][c]['f_score']\n",
3340
        "            report += row_fmt.format(c, *[precision, recall, f1], width=width, digits=digits)\n",
3341
        "\n",
3342
        "        report += '\\nrelax\\n'\n",
3343
        "\n",
3344
        "        for c in cate:\n",
3345
        "            precision = self.performance['category']['relax'][c]['precision']\n",
3346
        "            recall = self.performance['category']['relax'][c]['recall']\n",
3347
        "            f1 = self.performance['category']['relax'][c]['f_score']\n",
3348
        "            report += row_fmt.format(c, *[precision, recall, f1], width=width, digits=digits)\n",
3349
        "\n",
3350
        "        report += '\\n\\noverall\\n'\n",
3351
        "        report += 'acc: ' + str(self.performance['overall']['acc'])\n",
3352
        "        report += '\\nstrict\\n'\n",
3353
        "        report += row_fmt.format('', *[self.performance['overall']['strict']['precision'],\n",
3354
        "                                       self.performance['overall']['strict']['recall'],\n",
3355
        "                                       self.performance['overall']['strict']['f_score']], width=width, digits=digits)\n",
3356
        "\n",
3357
        "        report += '\\nrelax\\n'\n",
3358
        "        report += row_fmt.format('', *[self.performance['overall']['relax']['precision'],\n",
3359
        "                                       self.performance['overall']['relax']['recall'],\n",
3360
        "                                       self.performance['overall']['relax']['f_score']], width=width, digits=digits)\n",
3361
        "        return report\n"
3362
      ],
3363
      "metadata": {
3364
        "id": "wuEpADrGuC8X"
3365
      },
3366
      "execution_count": 39,
3367
      "outputs": []
3368
    },
3369
    {
3370
      "cell_type": "code",
3371
      "execution_count": 97,
3372
      "metadata": {
3373
        "id": "z0TLBcj8WJJ3"
3374
      },
3375
      "outputs": [],
3376
      "source": [
3377
        "evaluator = BioEval()"
3378
      ]
3379
    },
3380
    {
3381
      "cell_type": "code",
3382
      "execution_count": 119,
3383
      "metadata": {
3384
        "id": "ZQUMBCTGWJJ_"
3385
      },
3386
      "outputs": [],
3387
      "source": [
3388
        "evaluator.evaluate_annotations(true_labels[:50], pred_labels, do_lower=True)"
3389
      ]
3390
    },
3391
    {
3392
      "cell_type": "code",
3393
      "execution_count": 106,
3394
      "metadata": {
3395
        "colab": {
3396
          "base_uri": "https://localhost:8080/"
3397
        },
3398
        "id": "BqGc_EEzWJJ_",
3399
        "outputId": "49d087c8-72db-4296-a3d6-a696c3585138"
3400
      },
3401
      "outputs": [
3402
        {
3403
          "output_type": "execute_result",
3404
          "data": {
3405
            "text/plain": [
3406
              "{'overall': {'acc': 0.5583,\n",
3407
              "  'strict': {'precision': 0.4392764857881137,\n",
3408
              "   'recall': 0.5862068965517241,\n",
3409
              "   'f_score': 0.5022156573116692},\n",
3410
              "  'relax': {'precision': 0.5736434108527132,\n",
3411
              "   'recall': 0.7655172413793103,\n",
3412
              "   'f_score': 0.6558345642540621}},\n",
3413
              " 'category': {'strict': {'condition': {'precision': 0.48424068767908307,\n",
3414
              "    'recall': 0.7041666666666667,\n",
3415
              "    'f_score': 0.5738539898132428},\n",
3416
              "   'drug': {'precision': 0.07142857142857142,\n",
3417
              "    'recall': 0.07692307692307693,\n",
3418
              "    'f_score': 0.07407407407407408},\n",
3419
              "   'value': {'precision': 0.0, 'recall': 0.0, 'f_score': 0.0},\n",
3420
              "   'temporal': {'precision': 0.0, 'recall': 0.0, 'f_score': 0.0},\n",
3421
              "   'person': {'precision': 0.0, 'recall': 0.0, 'f_score': 0.0},\n",
3422
              "   'measurement': {'precision': 0.0, 'recall': 0.0, 'f_score': 0.0}},\n",
3423
              "  'relax': {'condition': {'precision': 0.6160458452722063,\n",
3424
              "    'recall': 0.8958333333333334,\n",
3425
              "    'f_score': 0.7300509337860781},\n",
3426
              "   'drug': {'precision': 0.07142857142857142,\n",
3427
              "    'recall': 0.07692307692307693,\n",
3428
              "    'f_score': 0.07407407407407408},\n",
3429
              "   'value': {'precision': 1.0, 'recall': 0.3333333333333333, 'f_score': 0.5},\n",
3430
              "   'temporal': {'precision': 0.5,\n",
3431
              "    'recall': 0.14285714285714285,\n",
3432
              "    'f_score': 0.22222222222222224},\n",
3433
              "   'person': {'precision': 0.0, 'recall': 0.0, 'f_score': 0.0},\n",
3434
              "   'measurement': {'precision': 0.5,\n",
3435
              "    'recall': 0.2222222222222222,\n",
3436
              "    'f_score': 0.30769230769230765}}}}"
3437
            ]
3438
          },
3439
          "metadata": {},
3440
          "execution_count": 106
3441
        }
3442
      ],
3443
      "source": [
3444
        "evaluator.performance"
3445
      ]
3446
    },
3447
    {
3448
      "cell_type": "code",
3449
      "execution_count": 120,
3450
      "metadata": {
3451
        "id": "O1hEM9uCWJJ_"
3452
      },
3453
      "outputs": [],
3454
      "source": [
3455
        "evaluator.save_evaluation('eval_p1.json')"
3456
      ]
3457
    },
3458
    {
3459
      "cell_type": "markdown",
3460
      "metadata": {
3461
        "id": "GGtVORAWWJJ_"
3462
      },
3463
      "source": [
3464
        "**Evaluating prompt 2**"
3465
      ]
3466
    },
3467
    {
3468
      "cell_type": "code",
3469
      "execution_count": 60,
3470
      "metadata": {
3471
        "id": "XxOxk2dkWJJ_"
3472
      },
3473
      "outputs": [],
3474
      "source": [
3475
        "def parse_ann2bio(sentence, pattern, pattern1, pattern2):\n",
3476
        "    # if sentence[-1] == \"\\n\":\n",
3477
        "    #     sentence = sentence[:-2] # remove the \\n and a final point wrongly added\n",
3478
        "    # else:\n",
3479
        "    #     sentence = sentence[:-1] # remove the final point wrongly added\n",
3480
        "\n",
3481
        "    # find the entities\n",
3482
        "    occurrences = re.finditer(pattern, sentence)\n",
3483
        "    indexes = [(match.start(), match.end()) for match in occurrences]\n",
3484
        "\n",
3485
        "    annotation = []\n",
3486
        "    i = 0\n",
3487
        "\n",
3488
        "\n",
3489
        "    # create the bio list\n",
3490
        "    for beg, end in indexes:\n",
3491
        "        if beg > i:\n",
3492
        "            annotation.extend([(word, \"O\") for word in sentence[i:beg].split()])\n",
3493
        "        entity = sentence[beg:end]\n",
3494
        "        entity_name = re.search(pattern1, entity).group(1)\n",
3495
        "        entity = entity.replace(f'<{entity_name}>', \"\").replace(f'</{entity_name}>', \"\")\n",
3496
        "        split_entity = entity.split()\n",
3497
        "        annotation.append((split_entity[0], \"B-\" + entity_name))\n",
3498
        "        annotation.extend([(word, \"I-\" + entity_name) for word in split_entity[1:]])\n",
3499
        "        i = end\n",
3500
        "    annotation.extend([(word, \"O\") for word in sentence[i:].split()])\n",
3501
        "\n",
3502
        "    # check punctuation sign in tokens and put them as individual tokens\n",
3503
        "    ps = r'(\\.|\\,|\\:|\\;|\\!|\\?|\\-|\\(|\\)|\\[|\\]|\\{|\\}|\\\")'\n",
3504
        "    new_annotation = []\n",
3505
        "    for i,(word, tag) in enumerate(annotation):\n",
3506
        "        if re.search(ps, word):\n",
3507
        "            # find the ocurrences of the punctuation signs\n",
3508
        "            occurrences = re.finditer(ps, word)\n",
3509
        "            indexes = [(match.start(), match.end()) for match in occurrences]\n",
3510
        "            # create the new tokens\n",
3511
        "            last = 0\n",
3512
        "            for j, (beg, end) in enumerate(indexes):\n",
3513
        "                if beg > last:\n",
3514
        "                    new_annotation.append((word[last:beg], tag))\n",
3515
        "                if tag != \"O\":\n",
3516
        "                    label = f'I-{tag.split(\"-\")[1]}'\n",
3517
        "                else:\n",
3518
        "                    label = \"O\"\n",
3519
        "                if end < len(word) or (i < len(annotation) - 1 and annotation[i+1][1] == label):\n",
3520
        "                    new_annotation.append((word[beg:end], label))\n",
3521
        "                else:\n",
3522
        "                    new_annotation.append((word[beg:end], 'O'))\n",
3523
        "                last = end\n",
3524
        "            if last < len(word):\n",
3525
        "                new_annotation.append((word[last:], label))\n",
3526
        "\n",
3527
        "        else:\n",
3528
        "            new_annotation.append((word, tag))\n",
3529
        "\n",
3530
        "\n",
3531
        "    return new_annotation"
3532
      ]
3533
    },
3534
    {
3535
      "cell_type": "code",
3536
      "execution_count": 45,
3537
      "metadata": {
3538
        "id": "Pe0s7piTWJKA"
3539
      },
3540
      "outputs": [],
3541
      "source": [
3542
        "pattern1 = r'<(Person|Condition|Value|Drug|Procedure|Measurement|Temporal|Observation|Device)>'\n",
3543
        "pattern2 = r'</(Person|Condition|Value|Drug|Procedure|Measurement|Temporal|Observation|Device)>'\n",
3544
        "pattern = f'{pattern1}.*?{pattern2}'"
3545
      ]
3546
    },
3547
    {
3548
      "cell_type": "code",
3549
      "source": [
3550
        "generated_sentences_p2[2], dataset['test'][2]"
3551
      ],
3552
      "metadata": {
3553
        "colab": {
3554
          "base_uri": "https://localhost:8080/"
3555
        },
3556
        "id": "w99p-fMiMN1K",
3557
        "outputId": "1ef5368d-f251-48e3-ab93-a02f61d29127"
3558
      },
3559
      "execution_count": 59,
3560
      "outputs": [
3561
        {
3562
          "output_type": "execute_result",
3563
          "data": {
3564
            "text/plain": [
3565
              "(' Current treatment with <Drug>Telbivudine</Drug>',\n",
3566
              " {'tokens': ['Current', 'treatment', 'with', 'Telbivudine'],\n",
3567
              "  'ner_tags': [0, 0, 0, 5],\n",
3568
              "  'file': 'NCT01373684_exc.bio.txt',\n",
3569
              "  'index': 1,\n",
3570
              "  'text': 'Current treatment with Telbivudine'})"
3571
            ]
3572
          },
3573
          "metadata": {},
3574
          "execution_count": 59
3575
        }
3576
      ]
3577
    },
3578
    {
3579
      "cell_type": "code",
3580
      "execution_count": 61,
3581
      "metadata": {
3582
        "colab": {
3583
          "base_uri": "https://localhost:8080/"
3584
        },
3585
        "id": "HjVitXo0WJKA",
3586
        "outputId": "0572b8f5-e724-49d1-abd5-3ee699c7876e"
3587
      },
3588
      "outputs": [
3589
        {
3590
          "output_type": "execute_result",
3591
          "data": {
3592
            "text/plain": [
3593
              "50"
3594
            ]
3595
          },
3596
          "metadata": {},
3597
          "execution_count": 61
3598
        }
3599
      ],
3600
      "source": [
3601
        "new_p2_annotations = []\n",
3602
        "for sent in generated_sentences_p2:\n",
3603
        "    annotation = parse_ann2bio(sent, pattern, pattern1, pattern2)\n",
3604
        "    new_p2_annotations.append(annotation)\n",
3605
        "len(new_p2_annotations)"
3606
      ]
3607
    },
3608
    {
3609
      "cell_type": "code",
3610
      "source": [
3611
        "new_p2_annotations[0]"
3612
      ],
3613
      "metadata": {
3614
        "colab": {
3615
          "base_uri": "https://localhost:8080/"
3616
        },
3617
        "id": "jTVKu9D8K_jI",
3618
        "outputId": "86cc58a6-ed81-45e0-fe2a-504ef58d4996"
3619
      },
3620
      "execution_count": 62,
3621
      "outputs": [
3622
        {
3623
          "output_type": "execute_result",
3624
          "data": {
3625
            "text/plain": [
3626
              "[('self', 'O'),\n",
3627
              " ('-', 'O'),\n",
3628
              " ('reported', 'O'),\n",
3629
              " ('healthy', 'O'),\n",
3630
              " ('adults', 'O'),\n",
3631
              " ('between', 'O'),\n",
3632
              " ('the', 'O'),\n",
3633
              " ('ages', 'O'),\n",
3634
              " ('of', 'O'),\n",
3635
              " ('18', 'B-Measurement'),\n",
3636
              " ('-', 'I-Measurement'),\n",
3637
              " ('60', 'I-Measurement'),\n",
3638
              " ('who', 'O'),\n",
3639
              " ('are', 'O'),\n",
3640
              " ('fluent', 'O'),\n",
3641
              " ('in', 'O'),\n",
3642
              " ('<Language>English</Language>', 'O'),\n",
3643
              " ('.', 'O')]"
3644
            ]
3645
          },
3646
          "metadata": {},
3647
          "execution_count": 62
3648
        }
3649
      ]
3650
    },
3651
    {
3652
      "cell_type": "code",
3653
      "execution_count": 63,
3654
      "metadata": {
3655
        "colab": {
3656
          "base_uri": "https://localhost:8080/"
3657
        },
3658
        "id": "gWX_IOuIWJKA",
3659
        "outputId": "7faad3c7-ee19-4be3-ee3b-61084403b635"
3660
      },
3661
      "outputs": [
3662
        {
3663
          "output_type": "execute_result",
3664
          "data": {
3665
            "text/plain": [
3666
              "50"
3667
            ]
3668
          },
3669
          "metadata": {},
3670
          "execution_count": 63
3671
        }
3672
      ],
3673
      "source": [
3674
        "p2_annotations = []\n",
3675
        "for sent in new_p2_annotations:\n",
3676
        "    dicc_sent = {\"tokens\":[], \"ner_tags\":[]}\n",
3677
        "    for word, tag in sent:\n",
3678
        "        dicc_sent[\"tokens\"].append(word)\n",
3679
        "        dicc_sent[\"ner_tags\"].append(sel_ent[tag])\n",
3680
        "    p2_annotations.append(dicc_sent)\n",
3681
        "len(p2_annotations)"
3682
      ]
3683
    },
3684
    {
3685
      "cell_type": "code",
3686
      "execution_count": 64,
3687
      "metadata": {
3688
        "id": "EnakJGL7WJKA"
3689
      },
3690
      "outputs": [],
3691
      "source": [
3692
        "p2_df = pd.DataFrame(p2_annotations)\n",
3693
        "p2_dataset = Dataset.from_pandas(p2_df)"
3694
      ]
3695
    },
3696
    {
3697
      "cell_type": "code",
3698
      "execution_count": 65,
3699
      "metadata": {
3700
        "colab": {
3701
          "base_uri": "https://localhost:8080/",
3702
          "height": 49,
3703
          "referenced_widgets": [
3704
            "a30bdf5afc2b4e078d8200d34b4760d2",
3705
            "7c864990c29f4f598cd3fdd02550d7b5",
3706
            "63640367c9494489a91988bb7c22b2b7",
3707
            "177e85135bd644c5b6f283237dd09361",
3708
            "9b5c5002cf264eb899c48390494d167a",
3709
            "d6d2c07d5f46410dbab13d86741f2be7",
3710
            "cab0297928774994b780ec733557af07",
3711
            "8b4d9c95b13c4c21bdd1c0295fc2be49",
3712
            "e246490ae7b14adbaa0e7a45fd177721",
3713
            "fced4d6e50b44fee950d1436e5618f82",
3714
            "12d7befb6aa644c7acad49e4e1c93167"
3715
          ]
3716
        },
3717
        "id": "uxCAUki5WJKA",
3718
        "outputId": "e86c7104-1a79-417d-d5bb-4e440c9f0d9b"
3719
      },
3720
      "outputs": [
3721
        {
3722
          "output_type": "display_data",
3723
          "data": {
3724
            "text/plain": [
3725
              "Map:   0%|          | 0/50 [00:00<?, ? examples/s]"
3726
            ],
3727
            "application/vnd.jupyter.widget-view+json": {
3728
              "version_major": 2,
3729
              "version_minor": 0,
3730
              "model_id": "a30bdf5afc2b4e078d8200d34b4760d2"
3731
            }
3732
          },
3733
          "metadata": {}
3734
        }
3735
      ],
3736
      "source": [
3737
        "p2_dataset = p2_dataset.map(tokenize_and_align_labels, batched=True)"
3738
      ]
3739
    },
3740
    {
3741
      "cell_type": "code",
3742
      "execution_count": 66,
3743
      "metadata": {
3744
        "colab": {
3745
          "base_uri": "https://localhost:8080/"
3746
        },
3747
        "id": "Kj_cPZ6XWJKA",
3748
        "outputId": "0c9d5d1e-ca25-4300-ef4c-88ee0939a5f2"
3749
      },
3750
      "outputs": [
3751
        {
3752
          "output_type": "stream",
3753
          "name": "stdout",
3754
          "text": [
3755
            "0.72\n"
3756
          ]
3757
        }
3758
      ],
3759
      "source": [
3760
        "# keep just sentences with the same length\n",
3761
        "sentences_to_evaluate_p2 = []\n",
3762
        "sentences_to_evaluate_true = []\n",
3763
        "\n",
3764
        "for i in range(len(p2_dataset)):\n",
3765
        "    if len(p2_dataset['labels'][i]) == len(true_ann_dataset['labels'][i]):\n",
3766
        "        sentences_to_evaluate_p2.append(p2_dataset['labels'][i])\n",
3767
        "        sentences_to_evaluate_true.append(true_ann_dataset['labels'][i])\n",
3768
        "\n",
3769
        "print(len(sentences_to_evaluate_p2)/len(p2_dataset))"
3770
      ]
3771
    },
3772
    {
3773
      "cell_type": "code",
3774
      "execution_count": 67,
3775
      "metadata": {
3776
        "id": "iFZTUVmlWJKA"
3777
      },
3778
      "outputs": [],
3779
      "source": [
3780
        "evaluator = BioEval()"
3781
      ]
3782
    },
3783
    {
3784
      "cell_type": "code",
3785
      "execution_count": 68,
3786
      "metadata": {
3787
        "id": "T-SCSbQSWJKA"
3788
      },
3789
      "outputs": [],
3790
      "source": [
3791
        "pred_labels, true_labels = get_labels((sentences_to_evaluate_p2, sentences_to_evaluate_true))"
3792
      ]
3793
    },
3794
    {
3795
      "cell_type": "code",
3796
      "execution_count": 71,
3797
      "metadata": {
3798
        "id": "zo2c6-fDWJKA"
3799
      },
3800
      "outputs": [],
3801
      "source": [
3802
        "evaluator.evaluate_annotations(true_labels, pred_labels, do_lower=True)"
3803
      ]
3804
    },
3805
    {
3806
      "cell_type": "code",
3807
      "execution_count": 72,
3808
      "metadata": {
3809
        "colab": {
3810
          "base_uri": "https://localhost:8080/"
3811
        },
3812
        "id": "UrJPzn__WJKB",
3813
        "outputId": "1acf6b29-a3b2-4d90-ad1f-895c66429caa"
3814
      },
3815
      "outputs": [
3816
        {
3817
          "output_type": "execute_result",
3818
          "data": {
3819
            "text/plain": [
3820
              "{'overall': {'acc': 0.6897,\n",
3821
              "  'strict': {'precision': 0.628158844765343,\n",
3822
              "   'recall': 0.5958904109589042,\n",
3823
              "   'f_score': 0.6115992970123024},\n",
3824
              "  'relax': {'precision': 0.7075812274368231,\n",
3825
              "   'recall': 0.6712328767123288,\n",
3826
              "   'f_score': 0.6889279437609842}},\n",
3827
              " 'category': {'strict': {'drug': {'precision': 0.15384615384615385,\n",
3828
              "    'recall': 0.11764705882352941,\n",
3829
              "    'f_score': 0.13333333333333333},\n",
3830
              "   'condition': {'precision': 0.7056277056277056,\n",
3831
              "    'recall': 0.6965811965811965,\n",
3832
              "    'f_score': 0.7010752688172043},\n",
3833
              "   'measurement': {'precision': 0.25, 'recall': 0.25, 'f_score': 0.25},\n",
3834
              "   'temporal': {'precision': 0.0, 'recall': 0.0, 'f_score': 0.0},\n",
3835
              "   'procedure': {'precision': 0.42857142857142855,\n",
3836
              "    'recall': 0.5454545454545454,\n",
3837
              "    'f_score': 0.4799999999999999},\n",
3838
              "   'value': {'precision': 0.5,\n",
3839
              "    'recall': 0.14285714285714285,\n",
3840
              "    'f_score': 0.22222222222222224},\n",
3841
              "   'observation': {'precision': 0.0, 'recall': 0.0, 'f_score': 0.0},\n",
3842
              "   'person': {'precision': 0.0, 'recall': 0.0, 'f_score': 0.0}},\n",
3843
              "  'relax': {'drug': {'precision': 0.3076923076923077,\n",
3844
              "    'recall': 0.23529411764705882,\n",
3845
              "    'f_score': 0.26666666666666666},\n",
3846
              "   'condition': {'precision': 0.7792207792207793,\n",
3847
              "    'recall': 0.7692307692307693,\n",
3848
              "    'f_score': 0.7741935483870968},\n",
3849
              "   'measurement': {'precision': 0.25, 'recall': 0.25, 'f_score': 0.25},\n",
3850
              "   'temporal': {'precision': 0.6666666666666666,\n",
3851
              "    'recall': 0.18181818181818182,\n",
3852
              "    'f_score': 0.28571428571428575},\n",
3853
              "   'procedure': {'precision': 0.5,\n",
3854
              "    'recall': 0.6363636363636364,\n",
3855
              "    'f_score': 0.56},\n",
3856
              "   'value': {'precision': 0.5,\n",
3857
              "    'recall': 0.14285714285714285,\n",
3858
              "    'f_score': 0.22222222222222224},\n",
3859
              "   'observation': {'precision': 0.0, 'recall': 0.0, 'f_score': 0.0},\n",
3860
              "   'person': {'precision': 0.0, 'recall': 0.0, 'f_score': 0.0}}}}"
3861
            ]
3862
          },
3863
          "metadata": {},
3864
          "execution_count": 72
3865
        }
3866
      ],
3867
      "source": [
3868
        "evaluator.performance"
3869
      ]
3870
    },
3871
    {
3872
      "cell_type": "code",
3873
      "execution_count": 73,
3874
      "metadata": {
3875
        "id": "l80O9eAfWJKB"
3876
      },
3877
      "outputs": [],
3878
      "source": [
3879
        "evaluator.save_evaluation('eval_p2.json')"
3880
      ]
3881
    },
3882
    {
3883
      "cell_type": "code",
3884
      "source": [
3885
        "evaluator.get_counts()"
3886
      ],
3887
      "metadata": {
3888
        "id": "Le3qW_LTNnaI",
3889
        "outputId": "40f54473-23c9-4b3e-dd63-56e17564442f",
3890
        "colab": {
3891
          "base_uri": "https://localhost:8080/"
3892
        }
3893
      },
3894
      "execution_count": 74,
3895
      "outputs": [
3896
        {
3897
          "output_type": "execute_result",
3898
          "data": {
3899
            "text/plain": [
3900
              "{'expect': {'overall': 292,\n",
3901
              "  'drug': 17,\n",
3902
              "  'condition': 234,\n",
3903
              "  'measurement': 8,\n",
3904
              "  'value': 7,\n",
3905
              "  'procedure': 11,\n",
3906
              "  'temporal': 11,\n",
3907
              "  'observation': 3,\n",
3908
              "  'person': 1},\n",
3909
              " 'prediction': {'strict': {'overall': {'total': 277,\n",
3910
              "    'true': 174,\n",
3911
              "    'false': 103},\n",
3912
              "   'drug': {'total': 13, 'true': 2, 'false': 11},\n",
3913
              "   'condition': {'total': 231, 'true': 163, 'false': 68},\n",
3914
              "   'measurement': {'total': 8, 'true': 2, 'false': 6},\n",
3915
              "   'temporal': {'total': 3, 'true': 0, 'false': 3},\n",
3916
              "   'procedure': {'total': 14, 'true': 6, 'false': 8},\n",
3917
              "   'value': {'total': 2, 'true': 1, 'false': 1},\n",
3918
              "   'observation': {'total': 2, 'true': 0, 'false': 2},\n",
3919
              "   'person': {'total': 4, 'true': 0, 'false': 4}},\n",
3920
              "  'relax': {'overall': {'total': 277, 'true': 196, 'false': 81},\n",
3921
              "   'drug': {'total': 13, 'true': 4, 'false': 9},\n",
3922
              "   'condition': {'total': 231, 'true': 180, 'false': 51},\n",
3923
              "   'measurement': {'total': 8, 'true': 2, 'false': 6},\n",
3924
              "   'temporal': {'total': 3, 'true': 2, 'false': 1},\n",
3925
              "   'procedure': {'total': 14, 'true': 7, 'false': 7},\n",
3926
              "   'value': {'total': 2, 'true': 1, 'false': 1},\n",
3927
              "   'observation': {'total': 2, 'true': 0, 'false': 2},\n",
3928
              "   'person': {'total': 4, 'true': 0, 'false': 4}}}}"
3929
            ]
3930
          },
3931
          "metadata": {},
3932
          "execution_count": 74
3933
        }
3934
      ]
3935
    }
3936
  ],
3937
  "metadata": {
3938
    "kernelspec": {
3939
      "display_name": "Python 3",
3940
      "name": "python3"
3941
    },
3942
    "language_info": {
3943
      "codemirror_mode": {
3944
        "name": "ipython",
3945
        "version": 3
3946
      },
3947
      "file_extension": ".py",
3948
      "mimetype": "text/x-python",
3949
      "name": "python",
3950
      "nbconvert_exporter": "python",
3951
      "pygments_lexer": "ipython3",
3952
      "version": "3.10.13"
3953
    },
3954
    "colab": {
3955
      "provenance": [],
3956
      "gpuType": "T4",
3957
      "include_colab_link": true
3958
    },
3959
    "accelerator": "GPU",
3960
    "widgets": {
3961
      "application/vnd.jupyter.widget-state+json": {
3962
        "0f4e594a6aa64dd697fb841d4207d4b7": {
3963
          "model_module": "@jupyter-widgets/controls",
3964
          "model_name": "VBoxModel",
3965
          "model_module_version": "1.5.0",
3966
          "state": {
3967
            "_dom_classes": [],
3968
            "_model_module": "@jupyter-widgets/controls",
3969
            "_model_module_version": "1.5.0",
3970
            "_model_name": "VBoxModel",
3971
            "_view_count": null,
3972
            "_view_module": "@jupyter-widgets/controls",
3973
            "_view_module_version": "1.5.0",
3974
            "_view_name": "VBoxView",
3975
            "box_style": "",
3976
            "children": [
3977
              "IPY_MODEL_9f151f163b2c4966b9460dd31e184a84",
3978
              "IPY_MODEL_67a9e41b56a741baa4a953905d11c263",
3979
              "IPY_MODEL_e32ad767760d4bddb2f3a0a38327339b",
3980
              "IPY_MODEL_681dac30b90b4b02b38f9e5bb99429c0"
3981
            ],
3982
            "layout": "IPY_MODEL_1fa716b65c7f47989184cc24bd56e5bd"
3983
          }
3984
        },
3985
        "413c1cd08da84e32ba2de36d891bf86a": {
3986
          "model_module": "@jupyter-widgets/controls",
3987
          "model_name": "HTMLModel",
3988
          "model_module_version": "1.5.0",
3989
          "state": {
3990
            "_dom_classes": [],
3991
            "_model_module": "@jupyter-widgets/controls",
3992
            "_model_module_version": "1.5.0",
3993
            "_model_name": "HTMLModel",
3994
            "_view_count": null,
3995
            "_view_module": "@jupyter-widgets/controls",
3996
            "_view_module_version": "1.5.0",
3997
            "_view_name": "HTMLView",
3998
            "description": "",
3999
            "description_tooltip": null,
4000
            "layout": "IPY_MODEL_494b9dacddb948f7b08b509e7a79f3f1",
4001
            "placeholder": "​",
4002
            "style": "IPY_MODEL_e3e2f93363b74e24bf22abcaf021b3dc",
4003
            "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
4004
          }
4005
        },
4006
        "ba7f75db7de145abb174658862ef50a3": {
4007
          "model_module": "@jupyter-widgets/controls",
4008
          "model_name": "PasswordModel",
4009
          "model_module_version": "1.5.0",
4010
          "state": {
4011
            "_dom_classes": [],
4012
            "_model_module": "@jupyter-widgets/controls",
4013
            "_model_module_version": "1.5.0",
4014
            "_model_name": "PasswordModel",
4015
            "_view_count": null,
4016
            "_view_module": "@jupyter-widgets/controls",
4017
            "_view_module_version": "1.5.0",
4018
            "_view_name": "PasswordView",
4019
            "continuous_update": true,
4020
            "description": "Token:",
4021
            "description_tooltip": null,
4022
            "disabled": false,
4023
            "layout": "IPY_MODEL_b607f0954b3844e18ed2a887372d42c5",
4024
            "placeholder": "​",
4025
            "style": "IPY_MODEL_95176ba773274023b4e3356bb3cb4cc9",
4026
            "value": ""
4027
          }
4028
        },
4029
        "d55f2bd66fe044e4816348c1b4b032bf": {
4030
          "model_module": "@jupyter-widgets/controls",
4031
          "model_name": "CheckboxModel",
4032
          "model_module_version": "1.5.0",
4033
          "state": {
4034
            "_dom_classes": [],
4035
            "_model_module": "@jupyter-widgets/controls",
4036
            "_model_module_version": "1.5.0",
4037
            "_model_name": "CheckboxModel",
4038
            "_view_count": null,
4039
            "_view_module": "@jupyter-widgets/controls",
4040
            "_view_module_version": "1.5.0",
4041
            "_view_name": "CheckboxView",
4042
            "description": "Add token as git credential?",
4043
            "description_tooltip": null,
4044
            "disabled": false,
4045
            "indent": true,
4046
            "layout": "IPY_MODEL_ac4c65187e864fd09e296fcbbe3ad6d8",
4047
            "style": "IPY_MODEL_540f2e543376445d849e1a56ac38e4d4",
4048
            "value": true
4049
          }
4050
        },
4051
        "d1fd6600f76a4bf7968b0d13de093148": {
4052
          "model_module": "@jupyter-widgets/controls",
4053
          "model_name": "ButtonModel",
4054
          "model_module_version": "1.5.0",
4055
          "state": {
4056
            "_dom_classes": [],
4057
            "_model_module": "@jupyter-widgets/controls",
4058
            "_model_module_version": "1.5.0",
4059
            "_model_name": "ButtonModel",
4060
            "_view_count": null,
4061
            "_view_module": "@jupyter-widgets/controls",
4062
            "_view_module_version": "1.5.0",
4063
            "_view_name": "ButtonView",
4064
            "button_style": "",
4065
            "description": "Login",
4066
            "disabled": false,
4067
            "icon": "",
4068
            "layout": "IPY_MODEL_a9d9e19fa5ae41109e7c7cf9a4d8a13c",
4069
            "style": "IPY_MODEL_4fd000047a0a402194cb3b99bb59d8f9",
4070
            "tooltip": ""
4071
          }
4072
        },
4073
        "b00aef23e75445c785a6b3b8756b9c94": {
4074
          "model_module": "@jupyter-widgets/controls",
4075
          "model_name": "HTMLModel",
4076
          "model_module_version": "1.5.0",
4077
          "state": {
4078
            "_dom_classes": [],
4079
            "_model_module": "@jupyter-widgets/controls",
4080
            "_model_module_version": "1.5.0",
4081
            "_model_name": "HTMLModel",
4082
            "_view_count": null,
4083
            "_view_module": "@jupyter-widgets/controls",
4084
            "_view_module_version": "1.5.0",
4085
            "_view_name": "HTMLView",
4086
            "description": "",
4087
            "description_tooltip": null,
4088
            "layout": "IPY_MODEL_9d08585ce6fd40538dc10d1c5d1aee70",
4089
            "placeholder": "​",
4090
            "style": "IPY_MODEL_f4642fc990e94bf99c7d26017d8771ba",
4091
            "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
4092
          }
4093
        },
4094
        "1fa716b65c7f47989184cc24bd56e5bd": {
4095
          "model_module": "@jupyter-widgets/base",
4096
          "model_name": "LayoutModel",
4097
          "model_module_version": "1.2.0",
4098
          "state": {
4099
            "_model_module": "@jupyter-widgets/base",
4100
            "_model_module_version": "1.2.0",
4101
            "_model_name": "LayoutModel",
4102
            "_view_count": null,
4103
            "_view_module": "@jupyter-widgets/base",
4104
            "_view_module_version": "1.2.0",
4105
            "_view_name": "LayoutView",
4106
            "align_content": null,
4107
            "align_items": "center",
4108
            "align_self": null,
4109
            "border": null,
4110
            "bottom": null,
4111
            "display": "flex",
4112
            "flex": null,
4113
            "flex_flow": "column",
4114
            "grid_area": null,
4115
            "grid_auto_columns": null,
4116
            "grid_auto_flow": null,
4117
            "grid_auto_rows": null,
4118
            "grid_column": null,
4119
            "grid_gap": null,
4120
            "grid_row": null,
4121
            "grid_template_areas": null,
4122
            "grid_template_columns": null,
4123
            "grid_template_rows": null,
4124
            "height": null,
4125
            "justify_content": null,
4126
            "justify_items": null,
4127
            "left": null,
4128
            "margin": null,
4129
            "max_height": null,
4130
            "max_width": null,
4131
            "min_height": null,
4132
            "min_width": null,
4133
            "object_fit": null,
4134
            "object_position": null,
4135
            "order": null,
4136
            "overflow": null,
4137
            "overflow_x": null,
4138
            "overflow_y": null,
4139
            "padding": null,
4140
            "right": null,
4141
            "top": null,
4142
            "visibility": null,
4143
            "width": "50%"
4144
          }
4145
        },
4146
        "494b9dacddb948f7b08b509e7a79f3f1": {
4147
          "model_module": "@jupyter-widgets/base",
4148
          "model_name": "LayoutModel",
4149
          "model_module_version": "1.2.0",
4150
          "state": {
4151
            "_model_module": "@jupyter-widgets/base",
4152
            "_model_module_version": "1.2.0",
4153
            "_model_name": "LayoutModel",
4154
            "_view_count": null,
4155
            "_view_module": "@jupyter-widgets/base",
4156
            "_view_module_version": "1.2.0",
4157
            "_view_name": "LayoutView",
4158
            "align_content": null,
4159
            "align_items": null,
4160
            "align_self": null,
4161
            "border": null,
4162
            "bottom": null,
4163
            "display": null,
4164
            "flex": null,
4165
            "flex_flow": null,
4166
            "grid_area": null,
4167
            "grid_auto_columns": null,
4168
            "grid_auto_flow": null,
4169
            "grid_auto_rows": null,
4170
            "grid_column": null,
4171
            "grid_gap": null,
4172
            "grid_row": null,
4173
            "grid_template_areas": null,
4174
            "grid_template_columns": null,
4175
            "grid_template_rows": null,
4176
            "height": null,
4177
            "justify_content": null,
4178
            "justify_items": null,
4179
            "left": null,
4180
            "margin": null,
4181
            "max_height": null,
4182
            "max_width": null,
4183
            "min_height": null,
4184
            "min_width": null,
4185
            "object_fit": null,
4186
            "object_position": null,
4187
            "order": null,
4188
            "overflow": null,
4189
            "overflow_x": null,
4190
            "overflow_y": null,
4191
            "padding": null,
4192
            "right": null,
4193
            "top": null,
4194
            "visibility": null,
4195
            "width": null
4196
          }
4197
        },
4198
        "e3e2f93363b74e24bf22abcaf021b3dc": {
4199
          "model_module": "@jupyter-widgets/controls",
4200
          "model_name": "DescriptionStyleModel",
4201
          "model_module_version": "1.5.0",
4202
          "state": {
4203
            "_model_module": "@jupyter-widgets/controls",
4204
            "_model_module_version": "1.5.0",
4205
            "_model_name": "DescriptionStyleModel",
4206
            "_view_count": null,
4207
            "_view_module": "@jupyter-widgets/base",
4208
            "_view_module_version": "1.2.0",
4209
            "_view_name": "StyleView",
4210
            "description_width": ""
4211
          }
4212
        },
4213
        "b607f0954b3844e18ed2a887372d42c5": {
4214
          "model_module": "@jupyter-widgets/base",
4215
          "model_name": "LayoutModel",
4216
          "model_module_version": "1.2.0",
4217
          "state": {
4218
            "_model_module": "@jupyter-widgets/base",
4219
            "_model_module_version": "1.2.0",
4220
            "_model_name": "LayoutModel",
4221
            "_view_count": null,
4222
            "_view_module": "@jupyter-widgets/base",
4223
            "_view_module_version": "1.2.0",
4224
            "_view_name": "LayoutView",
4225
            "align_content": null,
4226
            "align_items": null,
4227
            "align_self": null,
4228
            "border": null,
4229
            "bottom": null,
4230
            "display": null,
4231
            "flex": null,
4232
            "flex_flow": null,
4233
            "grid_area": null,
4234
            "grid_auto_columns": null,
4235
            "grid_auto_flow": null,
4236
            "grid_auto_rows": null,
4237
            "grid_column": null,
4238
            "grid_gap": null,
4239
            "grid_row": null,
4240
            "grid_template_areas": null,
4241
            "grid_template_columns": null,
4242
            "grid_template_rows": null,
4243
            "height": null,
4244
            "justify_content": null,
4245
            "justify_items": null,
4246
            "left": null,
4247
            "margin": null,
4248
            "max_height": null,
4249
            "max_width": null,
4250
            "min_height": null,
4251
            "min_width": null,
4252
            "object_fit": null,
4253
            "object_position": null,
4254
            "order": null,
4255
            "overflow": null,
4256
            "overflow_x": null,
4257
            "overflow_y": null,
4258
            "padding": null,
4259
            "right": null,
4260
            "top": null,
4261
            "visibility": null,
4262
            "width": null
4263
          }
4264
        },
4265
        "95176ba773274023b4e3356bb3cb4cc9": {
4266
          "model_module": "@jupyter-widgets/controls",
4267
          "model_name": "DescriptionStyleModel",
4268
          "model_module_version": "1.5.0",
4269
          "state": {
4270
            "_model_module": "@jupyter-widgets/controls",
4271
            "_model_module_version": "1.5.0",
4272
            "_model_name": "DescriptionStyleModel",
4273
            "_view_count": null,
4274
            "_view_module": "@jupyter-widgets/base",
4275
            "_view_module_version": "1.2.0",
4276
            "_view_name": "StyleView",
4277
            "description_width": ""
4278
          }
4279
        },
4280
        "ac4c65187e864fd09e296fcbbe3ad6d8": {
4281
          "model_module": "@jupyter-widgets/base",
4282
          "model_name": "LayoutModel",
4283
          "model_module_version": "1.2.0",
4284
          "state": {
4285
            "_model_module": "@jupyter-widgets/base",
4286
            "_model_module_version": "1.2.0",
4287
            "_model_name": "LayoutModel",
4288
            "_view_count": null,
4289
            "_view_module": "@jupyter-widgets/base",
4290
            "_view_module_version": "1.2.0",
4291
            "_view_name": "LayoutView",
4292
            "align_content": null,
4293
            "align_items": null,
4294
            "align_self": null,
4295
            "border": null,
4296
            "bottom": null,
4297
            "display": null,
4298
            "flex": null,
4299
            "flex_flow": null,
4300
            "grid_area": null,
4301
            "grid_auto_columns": null,
4302
            "grid_auto_flow": null,
4303
            "grid_auto_rows": null,
4304
            "grid_column": null,
4305
            "grid_gap": null,
4306
            "grid_row": null,
4307
            "grid_template_areas": null,
4308
            "grid_template_columns": null,
4309
            "grid_template_rows": null,
4310
            "height": null,
4311
            "justify_content": null,
4312
            "justify_items": null,
4313
            "left": null,
4314
            "margin": null,
4315
            "max_height": null,
4316
            "max_width": null,
4317
            "min_height": null,
4318
            "min_width": null,
4319
            "object_fit": null,
4320
            "object_position": null,
4321
            "order": null,
4322
            "overflow": null,
4323
            "overflow_x": null,
4324
            "overflow_y": null,
4325
            "padding": null,
4326
            "right": null,
4327
            "top": null,
4328
            "visibility": null,
4329
            "width": null
4330
          }
4331
        },
4332
        "540f2e543376445d849e1a56ac38e4d4": {
4333
          "model_module": "@jupyter-widgets/controls",
4334
          "model_name": "DescriptionStyleModel",
4335
          "model_module_version": "1.5.0",
4336
          "state": {
4337
            "_model_module": "@jupyter-widgets/controls",
4338
            "_model_module_version": "1.5.0",
4339
            "_model_name": "DescriptionStyleModel",
4340
            "_view_count": null,
4341
            "_view_module": "@jupyter-widgets/base",
4342
            "_view_module_version": "1.2.0",
4343
            "_view_name": "StyleView",
4344
            "description_width": ""
4345
          }
4346
        },
4347
        "a9d9e19fa5ae41109e7c7cf9a4d8a13c": {
4348
          "model_module": "@jupyter-widgets/base",
4349
          "model_name": "LayoutModel",
4350
          "model_module_version": "1.2.0",
4351
          "state": {
4352
            "_model_module": "@jupyter-widgets/base",
4353
            "_model_module_version": "1.2.0",
4354
            "_model_name": "LayoutModel",
4355
            "_view_count": null,
4356
            "_view_module": "@jupyter-widgets/base",
4357
            "_view_module_version": "1.2.0",
4358
            "_view_name": "LayoutView",
4359
            "align_content": null,
4360
            "align_items": null,
4361
            "align_self": null,
4362
            "border": null,
4363
            "bottom": null,
4364
            "display": null,
4365
            "flex": null,
4366
            "flex_flow": null,
4367
            "grid_area": null,
4368
            "grid_auto_columns": null,
4369
            "grid_auto_flow": null,
4370
            "grid_auto_rows": null,
4371
            "grid_column": null,
4372
            "grid_gap": null,
4373
            "grid_row": null,
4374
            "grid_template_areas": null,
4375
            "grid_template_columns": null,
4376
            "grid_template_rows": null,
4377
            "height": null,
4378
            "justify_content": null,
4379
            "justify_items": null,
4380
            "left": null,
4381
            "margin": null,
4382
            "max_height": null,
4383
            "max_width": null,
4384
            "min_height": null,
4385
            "min_width": null,
4386
            "object_fit": null,
4387
            "object_position": null,
4388
            "order": null,
4389
            "overflow": null,
4390
            "overflow_x": null,
4391
            "overflow_y": null,
4392
            "padding": null,
4393
            "right": null,
4394
            "top": null,
4395
            "visibility": null,
4396
            "width": null
4397
          }
4398
        },
4399
        "4fd000047a0a402194cb3b99bb59d8f9": {
4400
          "model_module": "@jupyter-widgets/controls",
4401
          "model_name": "ButtonStyleModel",
4402
          "model_module_version": "1.5.0",
4403
          "state": {
4404
            "_model_module": "@jupyter-widgets/controls",
4405
            "_model_module_version": "1.5.0",
4406
            "_model_name": "ButtonStyleModel",
4407
            "_view_count": null,
4408
            "_view_module": "@jupyter-widgets/base",
4409
            "_view_module_version": "1.2.0",
4410
            "_view_name": "StyleView",
4411
            "button_color": null,
4412
            "font_weight": ""
4413
          }
4414
        },
4415
        "9d08585ce6fd40538dc10d1c5d1aee70": {
4416
          "model_module": "@jupyter-widgets/base",
4417
          "model_name": "LayoutModel",
4418
          "model_module_version": "1.2.0",
4419
          "state": {
4420
            "_model_module": "@jupyter-widgets/base",
4421
            "_model_module_version": "1.2.0",
4422
            "_model_name": "LayoutModel",
4423
            "_view_count": null,
4424
            "_view_module": "@jupyter-widgets/base",
4425
            "_view_module_version": "1.2.0",
4426
            "_view_name": "LayoutView",
4427
            "align_content": null,
4428
            "align_items": null,
4429
            "align_self": null,
4430
            "border": null,
4431
            "bottom": null,
4432
            "display": null,
4433
            "flex": null,
4434
            "flex_flow": null,
4435
            "grid_area": null,
4436
            "grid_auto_columns": null,
4437
            "grid_auto_flow": null,
4438
            "grid_auto_rows": null,
4439
            "grid_column": null,
4440
            "grid_gap": null,
4441
            "grid_row": null,
4442
            "grid_template_areas": null,
4443
            "grid_template_columns": null,
4444
            "grid_template_rows": null,
4445
            "height": null,
4446
            "justify_content": null,
4447
            "justify_items": null,
4448
            "left": null,
4449
            "margin": null,
4450
            "max_height": null,
4451
            "max_width": null,
4452
            "min_height": null,
4453
            "min_width": null,
4454
            "object_fit": null,
4455
            "object_position": null,
4456
            "order": null,
4457
            "overflow": null,
4458
            "overflow_x": null,
4459
            "overflow_y": null,
4460
            "padding": null,
4461
            "right": null,
4462
            "top": null,
4463
            "visibility": null,
4464
            "width": null
4465
          }
4466
        },
4467
        "f4642fc990e94bf99c7d26017d8771ba": {
4468
          "model_module": "@jupyter-widgets/controls",
4469
          "model_name": "DescriptionStyleModel",
4470
          "model_module_version": "1.5.0",
4471
          "state": {
4472
            "_model_module": "@jupyter-widgets/controls",
4473
            "_model_module_version": "1.5.0",
4474
            "_model_name": "DescriptionStyleModel",
4475
            "_view_count": null,
4476
            "_view_module": "@jupyter-widgets/base",
4477
            "_view_module_version": "1.2.0",
4478
            "_view_name": "StyleView",
4479
            "description_width": ""
4480
          }
4481
        },
4482
        "9c49b620b2c04b83ae01bf4ab06e7270": {
4483
          "model_module": "@jupyter-widgets/controls",
4484
          "model_name": "LabelModel",
4485
          "model_module_version": "1.5.0",
4486
          "state": {
4487
            "_dom_classes": [],
4488
            "_model_module": "@jupyter-widgets/controls",
4489
            "_model_module_version": "1.5.0",
4490
            "_model_name": "LabelModel",
4491
            "_view_count": null,
4492
            "_view_module": "@jupyter-widgets/controls",
4493
            "_view_module_version": "1.5.0",
4494
            "_view_name": "LabelView",
4495
            "description": "",
4496
            "description_tooltip": null,
4497
            "layout": "IPY_MODEL_d47d0c2d528c40a38965fac763e450cf",
4498
            "placeholder": "​",
4499
            "style": "IPY_MODEL_374d4f7c917d48c2b81f24cbfcfce062",
4500
            "value": "Connecting..."
4501
          }
4502
        },
4503
        "d47d0c2d528c40a38965fac763e450cf": {
4504
          "model_module": "@jupyter-widgets/base",
4505
          "model_name": "LayoutModel",
4506
          "model_module_version": "1.2.0",
4507
          "state": {
4508
            "_model_module": "@jupyter-widgets/base",
4509
            "_model_module_version": "1.2.0",
4510
            "_model_name": "LayoutModel",
4511
            "_view_count": null,
4512
            "_view_module": "@jupyter-widgets/base",
4513
            "_view_module_version": "1.2.0",
4514
            "_view_name": "LayoutView",
4515
            "align_content": null,
4516
            "align_items": null,
4517
            "align_self": null,
4518
            "border": null,
4519
            "bottom": null,
4520
            "display": null,
4521
            "flex": null,
4522
            "flex_flow": null,
4523
            "grid_area": null,
4524
            "grid_auto_columns": null,
4525
            "grid_auto_flow": null,
4526
            "grid_auto_rows": null,
4527
            "grid_column": null,
4528
            "grid_gap": null,
4529
            "grid_row": null,
4530
            "grid_template_areas": null,
4531
            "grid_template_columns": null,
4532
            "grid_template_rows": null,
4533
            "height": null,
4534
            "justify_content": null,
4535
            "justify_items": null,
4536
            "left": null,
4537
            "margin": null,
4538
            "max_height": null,
4539
            "max_width": null,
4540
            "min_height": null,
4541
            "min_width": null,
4542
            "object_fit": null,
4543
            "object_position": null,
4544
            "order": null,
4545
            "overflow": null,
4546
            "overflow_x": null,
4547
            "overflow_y": null,
4548
            "padding": null,
4549
            "right": null,
4550
            "top": null,
4551
            "visibility": null,
4552
            "width": null
4553
          }
4554
        },
4555
        "374d4f7c917d48c2b81f24cbfcfce062": {
4556
          "model_module": "@jupyter-widgets/controls",
4557
          "model_name": "DescriptionStyleModel",
4558
          "model_module_version": "1.5.0",
4559
          "state": {
4560
            "_model_module": "@jupyter-widgets/controls",
4561
            "_model_module_version": "1.5.0",
4562
            "_model_name": "DescriptionStyleModel",
4563
            "_view_count": null,
4564
            "_view_module": "@jupyter-widgets/base",
4565
            "_view_module_version": "1.2.0",
4566
            "_view_name": "StyleView",
4567
            "description_width": ""
4568
          }
4569
        },
4570
        "9f151f163b2c4966b9460dd31e184a84": {
4571
          "model_module": "@jupyter-widgets/controls",
4572
          "model_name": "LabelModel",
4573
          "model_module_version": "1.5.0",
4574
          "state": {
4575
            "_dom_classes": [],
4576
            "_model_module": "@jupyter-widgets/controls",
4577
            "_model_module_version": "1.5.0",
4578
            "_model_name": "LabelModel",
4579
            "_view_count": null,
4580
            "_view_module": "@jupyter-widgets/controls",
4581
            "_view_module_version": "1.5.0",
4582
            "_view_name": "LabelView",
4583
            "description": "",
4584
            "description_tooltip": null,
4585
            "layout": "IPY_MODEL_dd0df631524e42e48299283f1a9555e9",
4586
            "placeholder": "​",
4587
            "style": "IPY_MODEL_609d5b49e085499994a11342e8dd425f",
4588
            "value": "Token is valid (permission: write)."
4589
          }
4590
        },
4591
        "67a9e41b56a741baa4a953905d11c263": {
4592
          "model_module": "@jupyter-widgets/controls",
4593
          "model_name": "LabelModel",
4594
          "model_module_version": "1.5.0",
4595
          "state": {
4596
            "_dom_classes": [],
4597
            "_model_module": "@jupyter-widgets/controls",
4598
            "_model_module_version": "1.5.0",
4599
            "_model_name": "LabelModel",
4600
            "_view_count": null,
4601
            "_view_module": "@jupyter-widgets/controls",
4602
            "_view_module_version": "1.5.0",
4603
            "_view_name": "LabelView",
4604
            "description": "",
4605
            "description_tooltip": null,
4606
            "layout": "IPY_MODEL_3e7792c5d2484e24950f1f83f899ba4a",
4607
            "placeholder": "​",
4608
            "style": "IPY_MODEL_8fae6bd7ebc54636a14b375fa46ec0dc",
4609
            "value": "Your token has been saved in your configured git credential helpers (store)."
4610
          }
4611
        },
4612
        "e32ad767760d4bddb2f3a0a38327339b": {
4613
          "model_module": "@jupyter-widgets/controls",
4614
          "model_name": "LabelModel",
4615
          "model_module_version": "1.5.0",
4616
          "state": {
4617
            "_dom_classes": [],
4618
            "_model_module": "@jupyter-widgets/controls",
4619
            "_model_module_version": "1.5.0",
4620
            "_model_name": "LabelModel",
4621
            "_view_count": null,
4622
            "_view_module": "@jupyter-widgets/controls",
4623
            "_view_module_version": "1.5.0",
4624
            "_view_name": "LabelView",
4625
            "description": "",
4626
            "description_tooltip": null,
4627
            "layout": "IPY_MODEL_7fa56a404f8847239109441a3b043a77",
4628
            "placeholder": "​",
4629
            "style": "IPY_MODEL_d34e4794cb944c1cb8ae88845cad54fb",
4630
            "value": "Your token has been saved to /root/.cache/huggingface/token"
4631
          }
4632
        },
4633
        "681dac30b90b4b02b38f9e5bb99429c0": {
4634
          "model_module": "@jupyter-widgets/controls",
4635
          "model_name": "LabelModel",
4636
          "model_module_version": "1.5.0",
4637
          "state": {
4638
            "_dom_classes": [],
4639
            "_model_module": "@jupyter-widgets/controls",
4640
            "_model_module_version": "1.5.0",
4641
            "_model_name": "LabelModel",
4642
            "_view_count": null,
4643
            "_view_module": "@jupyter-widgets/controls",
4644
            "_view_module_version": "1.5.0",
4645
            "_view_name": "LabelView",
4646
            "description": "",
4647
            "description_tooltip": null,
4648
            "layout": "IPY_MODEL_4856843799e242ac8b72cf37ad1a159f",
4649
            "placeholder": "​",
4650
            "style": "IPY_MODEL_c3c6d1e8aeb14e999ea3fadc10539861",
4651
            "value": "Login successful"
4652
          }
4653
        },
4654
        "dd0df631524e42e48299283f1a9555e9": {
4655
          "model_module": "@jupyter-widgets/base",
4656
          "model_name": "LayoutModel",
4657
          "model_module_version": "1.2.0",
4658
          "state": {
4659
            "_model_module": "@jupyter-widgets/base",
4660
            "_model_module_version": "1.2.0",
4661
            "_model_name": "LayoutModel",
4662
            "_view_count": null,
4663
            "_view_module": "@jupyter-widgets/base",
4664
            "_view_module_version": "1.2.0",
4665
            "_view_name": "LayoutView",
4666
            "align_content": null,
4667
            "align_items": null,
4668
            "align_self": null,
4669
            "border": null,
4670
            "bottom": null,
4671
            "display": null,
4672
            "flex": null,
4673
            "flex_flow": null,
4674
            "grid_area": null,
4675
            "grid_auto_columns": null,
4676
            "grid_auto_flow": null,
4677
            "grid_auto_rows": null,
4678
            "grid_column": null,
4679
            "grid_gap": null,
4680
            "grid_row": null,
4681
            "grid_template_areas": null,
4682
            "grid_template_columns": null,
4683
            "grid_template_rows": null,
4684
            "height": null,
4685
            "justify_content": null,
4686
            "justify_items": null,
4687
            "left": null,
4688
            "margin": null,
4689
            "max_height": null,
4690
            "max_width": null,
4691
            "min_height": null,
4692
            "min_width": null,
4693
            "object_fit": null,
4694
            "object_position": null,
4695
            "order": null,
4696
            "overflow": null,
4697
            "overflow_x": null,
4698
            "overflow_y": null,
4699
            "padding": null,
4700
            "right": null,
4701
            "top": null,
4702
            "visibility": null,
4703
            "width": null
4704
          }
4705
        },
4706
        "609d5b49e085499994a11342e8dd425f": {
4707
          "model_module": "@jupyter-widgets/controls",
4708
          "model_name": "DescriptionStyleModel",
4709
          "model_module_version": "1.5.0",
4710
          "state": {
4711
            "_model_module": "@jupyter-widgets/controls",
4712
            "_model_module_version": "1.5.0",
4713
            "_model_name": "DescriptionStyleModel",
4714
            "_view_count": null,
4715
            "_view_module": "@jupyter-widgets/base",
4716
            "_view_module_version": "1.2.0",
4717
            "_view_name": "StyleView",
4718
            "description_width": ""
4719
          }
4720
        },
4721
        "3e7792c5d2484e24950f1f83f899ba4a": {
4722
          "model_module": "@jupyter-widgets/base",
4723
          "model_name": "LayoutModel",
4724
          "model_module_version": "1.2.0",
4725
          "state": {
4726
            "_model_module": "@jupyter-widgets/base",
4727
            "_model_module_version": "1.2.0",
4728
            "_model_name": "LayoutModel",
4729
            "_view_count": null,
4730
            "_view_module": "@jupyter-widgets/base",
4731
            "_view_module_version": "1.2.0",
4732
            "_view_name": "LayoutView",
4733
            "align_content": null,
4734
            "align_items": null,
4735
            "align_self": null,
4736
            "border": null,
4737
            "bottom": null,
4738
            "display": null,
4739
            "flex": null,
4740
            "flex_flow": null,
4741
            "grid_area": null,
4742
            "grid_auto_columns": null,
4743
            "grid_auto_flow": null,
4744
            "grid_auto_rows": null,
4745
            "grid_column": null,
4746
            "grid_gap": null,
4747
            "grid_row": null,
4748
            "grid_template_areas": null,
4749
            "grid_template_columns": null,
4750
            "grid_template_rows": null,
4751
            "height": null,
4752
            "justify_content": null,
4753
            "justify_items": null,
4754
            "left": null,
4755
            "margin": null,
4756
            "max_height": null,
4757
            "max_width": null,
4758
            "min_height": null,
4759
            "min_width": null,
4760
            "object_fit": null,
4761
            "object_position": null,
4762
            "order": null,
4763
            "overflow": null,
4764
            "overflow_x": null,
4765
            "overflow_y": null,
4766
            "padding": null,
4767
            "right": null,
4768
            "top": null,
4769
            "visibility": null,
4770
            "width": null
4771
          }
4772
        },
4773
        "8fae6bd7ebc54636a14b375fa46ec0dc": {
4774
          "model_module": "@jupyter-widgets/controls",
4775
          "model_name": "DescriptionStyleModel",
4776
          "model_module_version": "1.5.0",
4777
          "state": {
4778
            "_model_module": "@jupyter-widgets/controls",
4779
            "_model_module_version": "1.5.0",
4780
            "_model_name": "DescriptionStyleModel",
4781
            "_view_count": null,
4782
            "_view_module": "@jupyter-widgets/base",
4783
            "_view_module_version": "1.2.0",
4784
            "_view_name": "StyleView",
4785
            "description_width": ""
4786
          }
4787
        },
4788
        "7fa56a404f8847239109441a3b043a77": {
4789
          "model_module": "@jupyter-widgets/base",
4790
          "model_name": "LayoutModel",
4791
          "model_module_version": "1.2.0",
4792
          "state": {
4793
            "_model_module": "@jupyter-widgets/base",
4794
            "_model_module_version": "1.2.0",
4795
            "_model_name": "LayoutModel",
4796
            "_view_count": null,
4797
            "_view_module": "@jupyter-widgets/base",
4798
            "_view_module_version": "1.2.0",
4799
            "_view_name": "LayoutView",
4800
            "align_content": null,
4801
            "align_items": null,
4802
            "align_self": null,
4803
            "border": null,
4804
            "bottom": null,
4805
            "display": null,
4806
            "flex": null,
4807
            "flex_flow": null,
4808
            "grid_area": null,
4809
            "grid_auto_columns": null,
4810
            "grid_auto_flow": null,
4811
            "grid_auto_rows": null,
4812
            "grid_column": null,
4813
            "grid_gap": null,
4814
            "grid_row": null,
4815
            "grid_template_areas": null,
4816
            "grid_template_columns": null,
4817
            "grid_template_rows": null,
4818
            "height": null,
4819
            "justify_content": null,
4820
            "justify_items": null,
4821
            "left": null,
4822
            "margin": null,
4823
            "max_height": null,
4824
            "max_width": null,
4825
            "min_height": null,
4826
            "min_width": null,
4827
            "object_fit": null,
4828
            "object_position": null,
4829
            "order": null,
4830
            "overflow": null,
4831
            "overflow_x": null,
4832
            "overflow_y": null,
4833
            "padding": null,
4834
            "right": null,
4835
            "top": null,
4836
            "visibility": null,
4837
            "width": null
4838
          }
4839
        },
4840
        "d34e4794cb944c1cb8ae88845cad54fb": {
4841
          "model_module": "@jupyter-widgets/controls",
4842
          "model_name": "DescriptionStyleModel",
4843
          "model_module_version": "1.5.0",
4844
          "state": {
4845
            "_model_module": "@jupyter-widgets/controls",
4846
            "_model_module_version": "1.5.0",
4847
            "_model_name": "DescriptionStyleModel",
4848
            "_view_count": null,
4849
            "_view_module": "@jupyter-widgets/base",
4850
            "_view_module_version": "1.2.0",
4851
            "_view_name": "StyleView",
4852
            "description_width": ""
4853
          }
4854
        },
4855
        "4856843799e242ac8b72cf37ad1a159f": {
4856
          "model_module": "@jupyter-widgets/base",
4857
          "model_name": "LayoutModel",
4858
          "model_module_version": "1.2.0",
4859
          "state": {
4860
            "_model_module": "@jupyter-widgets/base",
4861
            "_model_module_version": "1.2.0",
4862
            "_model_name": "LayoutModel",
4863
            "_view_count": null,
4864
            "_view_module": "@jupyter-widgets/base",
4865
            "_view_module_version": "1.2.0",
4866
            "_view_name": "LayoutView",
4867
            "align_content": null,
4868
            "align_items": null,
4869
            "align_self": null,
4870
            "border": null,
4871
            "bottom": null,
4872
            "display": null,
4873
            "flex": null,
4874
            "flex_flow": null,
4875
            "grid_area": null,
4876
            "grid_auto_columns": null,
4877
            "grid_auto_flow": null,
4878
            "grid_auto_rows": null,
4879
            "grid_column": null,
4880
            "grid_gap": null,
4881
            "grid_row": null,
4882
            "grid_template_areas": null,
4883
            "grid_template_columns": null,
4884
            "grid_template_rows": null,
4885
            "height": null,
4886
            "justify_content": null,
4887
            "justify_items": null,
4888
            "left": null,
4889
            "margin": null,
4890
            "max_height": null,
4891
            "max_width": null,
4892
            "min_height": null,
4893
            "min_width": null,
4894
            "object_fit": null,
4895
            "object_position": null,
4896
            "order": null,
4897
            "overflow": null,
4898
            "overflow_x": null,
4899
            "overflow_y": null,
4900
            "padding": null,
4901
            "right": null,
4902
            "top": null,
4903
            "visibility": null,
4904
            "width": null
4905
          }
4906
        },
4907
        "c3c6d1e8aeb14e999ea3fadc10539861": {
4908
          "model_module": "@jupyter-widgets/controls",
4909
          "model_name": "DescriptionStyleModel",
4910
          "model_module_version": "1.5.0",
4911
          "state": {
4912
            "_model_module": "@jupyter-widgets/controls",
4913
            "_model_module_version": "1.5.0",
4914
            "_model_name": "DescriptionStyleModel",
4915
            "_view_count": null,
4916
            "_view_module": "@jupyter-widgets/base",
4917
            "_view_module_version": "1.2.0",
4918
            "_view_name": "StyleView",
4919
            "description_width": ""
4920
          }
4921
        },
4922
        "e1b0ea4e59bd4bebb31544a4ca43fae6": {
4923
          "model_module": "@jupyter-widgets/controls",
4924
          "model_name": "HBoxModel",
4925
          "model_module_version": "1.5.0",
4926
          "state": {
4927
            "_dom_classes": [],
4928
            "_model_module": "@jupyter-widgets/controls",
4929
            "_model_module_version": "1.5.0",
4930
            "_model_name": "HBoxModel",
4931
            "_view_count": null,
4932
            "_view_module": "@jupyter-widgets/controls",
4933
            "_view_module_version": "1.5.0",
4934
            "_view_name": "HBoxView",
4935
            "box_style": "",
4936
            "children": [
4937
              "IPY_MODEL_bb7c0220e2404676a2a37b5f8e45a560",
4938
              "IPY_MODEL_d9a38641722d4d1698c9eb12c9ba2aa2",
4939
              "IPY_MODEL_726b3712020e475e8e5734e51ad3255b"
4940
            ],
4941
            "layout": "IPY_MODEL_b834f5ce5d4b4e8eac3688c2119e0e6e"
4942
          }
4943
        },
4944
        "bb7c0220e2404676a2a37b5f8e45a560": {
4945
          "model_module": "@jupyter-widgets/controls",
4946
          "model_name": "HTMLModel",
4947
          "model_module_version": "1.5.0",
4948
          "state": {
4949
            "_dom_classes": [],
4950
            "_model_module": "@jupyter-widgets/controls",
4951
            "_model_module_version": "1.5.0",
4952
            "_model_name": "HTMLModel",
4953
            "_view_count": null,
4954
            "_view_module": "@jupyter-widgets/controls",
4955
            "_view_module_version": "1.5.0",
4956
            "_view_name": "HTMLView",
4957
            "description": "",
4958
            "description_tooltip": null,
4959
            "layout": "IPY_MODEL_99fea94eaf344a1687764756225e9a88",
4960
            "placeholder": "​",
4961
            "style": "IPY_MODEL_640023c221074c73a38d147a05cfedaf",
4962
            "value": "Loading checkpoint shards: 100%"
4963
          }
4964
        },
4965
        "d9a38641722d4d1698c9eb12c9ba2aa2": {
4966
          "model_module": "@jupyter-widgets/controls",
4967
          "model_name": "FloatProgressModel",
4968
          "model_module_version": "1.5.0",
4969
          "state": {
4970
            "_dom_classes": [],
4971
            "_model_module": "@jupyter-widgets/controls",
4972
            "_model_module_version": "1.5.0",
4973
            "_model_name": "FloatProgressModel",
4974
            "_view_count": null,
4975
            "_view_module": "@jupyter-widgets/controls",
4976
            "_view_module_version": "1.5.0",
4977
            "_view_name": "ProgressView",
4978
            "bar_style": "success",
4979
            "description": "",
4980
            "description_tooltip": null,
4981
            "layout": "IPY_MODEL_e2a1e70aa375450299d81baa7a007f1f",
4982
            "max": 2,
4983
            "min": 0,
4984
            "orientation": "horizontal",
4985
            "style": "IPY_MODEL_4f06ce02f7b8452981731c87ac3814bc",
4986
            "value": 2
4987
          }
4988
        },
4989
        "726b3712020e475e8e5734e51ad3255b": {
4990
          "model_module": "@jupyter-widgets/controls",
4991
          "model_name": "HTMLModel",
4992
          "model_module_version": "1.5.0",
4993
          "state": {
4994
            "_dom_classes": [],
4995
            "_model_module": "@jupyter-widgets/controls",
4996
            "_model_module_version": "1.5.0",
4997
            "_model_name": "HTMLModel",
4998
            "_view_count": null,
4999
            "_view_module": "@jupyter-widgets/controls",
5000
            "_view_module_version": "1.5.0",
5001
            "_view_name": "HTMLView",
5002
            "description": "",
5003
            "description_tooltip": null,
5004
            "layout": "IPY_MODEL_ed7294f2acf84015ac0c0c3b78c629cb",
5005
            "placeholder": "​",
5006
            "style": "IPY_MODEL_1cf1101362c545738dcbb5e528f1af33",
5007
            "value": " 2/2 [01:23&lt;00:00, 38.53s/it]"
5008
          }
5009
        },
5010
        "b834f5ce5d4b4e8eac3688c2119e0e6e": {
5011
          "model_module": "@jupyter-widgets/base",
5012
          "model_name": "LayoutModel",
5013
          "model_module_version": "1.2.0",
5014
          "state": {
5015
            "_model_module": "@jupyter-widgets/base",
5016
            "_model_module_version": "1.2.0",
5017
            "_model_name": "LayoutModel",
5018
            "_view_count": null,
5019
            "_view_module": "@jupyter-widgets/base",
5020
            "_view_module_version": "1.2.0",
5021
            "_view_name": "LayoutView",
5022
            "align_content": null,
5023
            "align_items": null,
5024
            "align_self": null,
5025
            "border": null,
5026
            "bottom": null,
5027
            "display": null,
5028
            "flex": null,
5029
            "flex_flow": null,
5030
            "grid_area": null,
5031
            "grid_auto_columns": null,
5032
            "grid_auto_flow": null,
5033
            "grid_auto_rows": null,
5034
            "grid_column": null,
5035
            "grid_gap": null,
5036
            "grid_row": null,
5037
            "grid_template_areas": null,
5038
            "grid_template_columns": null,
5039
            "grid_template_rows": null,
5040
            "height": null,
5041
            "justify_content": null,
5042
            "justify_items": null,
5043
            "left": null,
5044
            "margin": null,
5045
            "max_height": null,
5046
            "max_width": null,
5047
            "min_height": null,
5048
            "min_width": null,
5049
            "object_fit": null,
5050
            "object_position": null,
5051
            "order": null,
5052
            "overflow": null,
5053
            "overflow_x": null,
5054
            "overflow_y": null,
5055
            "padding": null,
5056
            "right": null,
5057
            "top": null,
5058
            "visibility": null,
5059
            "width": null
5060
          }
5061
        },
5062
        "99fea94eaf344a1687764756225e9a88": {
5063
          "model_module": "@jupyter-widgets/base",
5064
          "model_name": "LayoutModel",
5065
          "model_module_version": "1.2.0",
5066
          "state": {
5067
            "_model_module": "@jupyter-widgets/base",
5068
            "_model_module_version": "1.2.0",
5069
            "_model_name": "LayoutModel",
5070
            "_view_count": null,
5071
            "_view_module": "@jupyter-widgets/base",
5072
            "_view_module_version": "1.2.0",
5073
            "_view_name": "LayoutView",
5074
            "align_content": null,
5075
            "align_items": null,
5076
            "align_self": null,
5077
            "border": null,
5078
            "bottom": null,
5079
            "display": null,
5080
            "flex": null,
5081
            "flex_flow": null,
5082
            "grid_area": null,
5083
            "grid_auto_columns": null,
5084
            "grid_auto_flow": null,
5085
            "grid_auto_rows": null,
5086
            "grid_column": null,
5087
            "grid_gap": null,
5088
            "grid_row": null,
5089
            "grid_template_areas": null,
5090
            "grid_template_columns": null,
5091
            "grid_template_rows": null,
5092
            "height": null,
5093
            "justify_content": null,
5094
            "justify_items": null,
5095
            "left": null,
5096
            "margin": null,
5097
            "max_height": null,
5098
            "max_width": null,
5099
            "min_height": null,
5100
            "min_width": null,
5101
            "object_fit": null,
5102
            "object_position": null,
5103
            "order": null,
5104
            "overflow": null,
5105
            "overflow_x": null,
5106
            "overflow_y": null,
5107
            "padding": null,
5108
            "right": null,
5109
            "top": null,
5110
            "visibility": null,
5111
            "width": null
5112
          }
5113
        },
5114
        "640023c221074c73a38d147a05cfedaf": {
5115
          "model_module": "@jupyter-widgets/controls",
5116
          "model_name": "DescriptionStyleModel",
5117
          "model_module_version": "1.5.0",
5118
          "state": {
5119
            "_model_module": "@jupyter-widgets/controls",
5120
            "_model_module_version": "1.5.0",
5121
            "_model_name": "DescriptionStyleModel",
5122
            "_view_count": null,
5123
            "_view_module": "@jupyter-widgets/base",
5124
            "_view_module_version": "1.2.0",
5125
            "_view_name": "StyleView",
5126
            "description_width": ""
5127
          }
5128
        },
5129
        "e2a1e70aa375450299d81baa7a007f1f": {
5130
          "model_module": "@jupyter-widgets/base",
5131
          "model_name": "LayoutModel",
5132
          "model_module_version": "1.2.0",
5133
          "state": {
5134
            "_model_module": "@jupyter-widgets/base",
5135
            "_model_module_version": "1.2.0",
5136
            "_model_name": "LayoutModel",
5137
            "_view_count": null,
5138
            "_view_module": "@jupyter-widgets/base",
5139
            "_view_module_version": "1.2.0",
5140
            "_view_name": "LayoutView",
5141
            "align_content": null,
5142
            "align_items": null,
5143
            "align_self": null,
5144
            "border": null,
5145
            "bottom": null,
5146
            "display": null,
5147
            "flex": null,
5148
            "flex_flow": null,
5149
            "grid_area": null,
5150
            "grid_auto_columns": null,
5151
            "grid_auto_flow": null,
5152
            "grid_auto_rows": null,
5153
            "grid_column": null,
5154
            "grid_gap": null,
5155
            "grid_row": null,
5156
            "grid_template_areas": null,
5157
            "grid_template_columns": null,
5158
            "grid_template_rows": null,
5159
            "height": null,
5160
            "justify_content": null,
5161
            "justify_items": null,
5162
            "left": null,
5163
            "margin": null,
5164
            "max_height": null,
5165
            "max_width": null,
5166
            "min_height": null,
5167
            "min_width": null,
5168
            "object_fit": null,
5169
            "object_position": null,
5170
            "order": null,
5171
            "overflow": null,
5172
            "overflow_x": null,
5173
            "overflow_y": null,
5174
            "padding": null,
5175
            "right": null,
5176
            "top": null,
5177
            "visibility": null,
5178
            "width": null
5179
          }
5180
        },
5181
        "4f06ce02f7b8452981731c87ac3814bc": {
5182
          "model_module": "@jupyter-widgets/controls",
5183
          "model_name": "ProgressStyleModel",
5184
          "model_module_version": "1.5.0",
5185
          "state": {
5186
            "_model_module": "@jupyter-widgets/controls",
5187
            "_model_module_version": "1.5.0",
5188
            "_model_name": "ProgressStyleModel",
5189
            "_view_count": null,
5190
            "_view_module": "@jupyter-widgets/base",
5191
            "_view_module_version": "1.2.0",
5192
            "_view_name": "StyleView",
5193
            "bar_color": null,
5194
            "description_width": ""
5195
          }
5196
        },
5197
        "ed7294f2acf84015ac0c0c3b78c629cb": {
5198
          "model_module": "@jupyter-widgets/base",
5199
          "model_name": "LayoutModel",
5200
          "model_module_version": "1.2.0",
5201
          "state": {
5202
            "_model_module": "@jupyter-widgets/base",
5203
            "_model_module_version": "1.2.0",
5204
            "_model_name": "LayoutModel",
5205
            "_view_count": null,
5206
            "_view_module": "@jupyter-widgets/base",
5207
            "_view_module_version": "1.2.0",
5208
            "_view_name": "LayoutView",
5209
            "align_content": null,
5210
            "align_items": null,
5211
            "align_self": null,
5212
            "border": null,
5213
            "bottom": null,
5214
            "display": null,
5215
            "flex": null,
5216
            "flex_flow": null,
5217
            "grid_area": null,
5218
            "grid_auto_columns": null,
5219
            "grid_auto_flow": null,
5220
            "grid_auto_rows": null,
5221
            "grid_column": null,
5222
            "grid_gap": null,
5223
            "grid_row": null,
5224
            "grid_template_areas": null,
5225
            "grid_template_columns": null,
5226
            "grid_template_rows": null,
5227
            "height": null,
5228
            "justify_content": null,
5229
            "justify_items": null,
5230
            "left": null,
5231
            "margin": null,
5232
            "max_height": null,
5233
            "max_width": null,
5234
            "min_height": null,
5235
            "min_width": null,
5236
            "object_fit": null,
5237
            "object_position": null,
5238
            "order": null,
5239
            "overflow": null,
5240
            "overflow_x": null,
5241
            "overflow_y": null,
5242
            "padding": null,
5243
            "right": null,
5244
            "top": null,
5245
            "visibility": null,
5246
            "width": null
5247
          }
5248
        },
5249
        "1cf1101362c545738dcbb5e528f1af33": {
5250
          "model_module": "@jupyter-widgets/controls",
5251
          "model_name": "DescriptionStyleModel",
5252
          "model_module_version": "1.5.0",
5253
          "state": {
5254
            "_model_module": "@jupyter-widgets/controls",
5255
            "_model_module_version": "1.5.0",
5256
            "_model_name": "DescriptionStyleModel",
5257
            "_view_count": null,
5258
            "_view_module": "@jupyter-widgets/base",
5259
            "_view_module_version": "1.2.0",
5260
            "_view_name": "StyleView",
5261
            "description_width": ""
5262
          }
5263
        },
5264
        "c8846cf2991a4e93a0e91bc1802f1079": {
5265
          "model_module": "@jupyter-widgets/controls",
5266
          "model_name": "HBoxModel",
5267
          "model_module_version": "1.5.0",
5268
          "state": {
5269
            "_dom_classes": [],
5270
            "_model_module": "@jupyter-widgets/controls",
5271
            "_model_module_version": "1.5.0",
5272
            "_model_name": "HBoxModel",
5273
            "_view_count": null,
5274
            "_view_module": "@jupyter-widgets/controls",
5275
            "_view_module_version": "1.5.0",
5276
            "_view_name": "HBoxView",
5277
            "box_style": "",
5278
            "children": [
5279
              "IPY_MODEL_3b12538b8e254b4ebcc79d694b2b272c",
5280
              "IPY_MODEL_205f861cb2ed48209b795217f645e077",
5281
              "IPY_MODEL_25605c2b3e93464daedc1fcade1f47b4"
5282
            ],
5283
            "layout": "IPY_MODEL_98919ba0701545fd997e3d71f4eff69e"
5284
          }
5285
        },
5286
        "3b12538b8e254b4ebcc79d694b2b272c": {
5287
          "model_module": "@jupyter-widgets/controls",
5288
          "model_name": "HTMLModel",
5289
          "model_module_version": "1.5.0",
5290
          "state": {
5291
            "_dom_classes": [],
5292
            "_model_module": "@jupyter-widgets/controls",
5293
            "_model_module_version": "1.5.0",
5294
            "_model_name": "HTMLModel",
5295
            "_view_count": null,
5296
            "_view_module": "@jupyter-widgets/controls",
5297
            "_view_module_version": "1.5.0",
5298
            "_view_name": "HTMLView",
5299
            "description": "",
5300
            "description_tooltip": null,
5301
            "layout": "IPY_MODEL_013efa5f201d460e9332bd047982c449",
5302
            "placeholder": "​",
5303
            "style": "IPY_MODEL_7a0323004bd14c579c27df13bf3f861b",
5304
            "value": "tokenizer_config.json: 100%"
5305
          }
5306
        },
5307
        "205f861cb2ed48209b795217f645e077": {
5308
          "model_module": "@jupyter-widgets/controls",
5309
          "model_name": "FloatProgressModel",
5310
          "model_module_version": "1.5.0",
5311
          "state": {
5312
            "_dom_classes": [],
5313
            "_model_module": "@jupyter-widgets/controls",
5314
            "_model_module_version": "1.5.0",
5315
            "_model_name": "FloatProgressModel",
5316
            "_view_count": null,
5317
            "_view_module": "@jupyter-widgets/controls",
5318
            "_view_module_version": "1.5.0",
5319
            "_view_name": "ProgressView",
5320
            "bar_style": "success",
5321
            "description": "",
5322
            "description_tooltip": null,
5323
            "layout": "IPY_MODEL_ef49e0a4613f4207b11012c494b4ed15",
5324
            "max": 25,
5325
            "min": 0,
5326
            "orientation": "horizontal",
5327
            "style": "IPY_MODEL_f88c80704c59405f9224c9a98105303d",
5328
            "value": 25
5329
          }
5330
        },
5331
        "25605c2b3e93464daedc1fcade1f47b4": {
5332
          "model_module": "@jupyter-widgets/controls",
5333
          "model_name": "HTMLModel",
5334
          "model_module_version": "1.5.0",
5335
          "state": {
5336
            "_dom_classes": [],
5337
            "_model_module": "@jupyter-widgets/controls",
5338
            "_model_module_version": "1.5.0",
5339
            "_model_name": "HTMLModel",
5340
            "_view_count": null,
5341
            "_view_module": "@jupyter-widgets/controls",
5342
            "_view_module_version": "1.5.0",
5343
            "_view_name": "HTMLView",
5344
            "description": "",
5345
            "description_tooltip": null,
5346
            "layout": "IPY_MODEL_1a4eab919e9e475ea452687215355489",
5347
            "placeholder": "​",
5348
            "style": "IPY_MODEL_cf5d7616d08944108ae6dcfdb487cd5a",
5349
            "value": " 25.0/25.0 [00:00&lt;00:00, 1.87kB/s]"
5350
          }
5351
        },
5352
        "98919ba0701545fd997e3d71f4eff69e": {
5353
          "model_module": "@jupyter-widgets/base",
5354
          "model_name": "LayoutModel",
5355
          "model_module_version": "1.2.0",
5356
          "state": {
5357
            "_model_module": "@jupyter-widgets/base",
5358
            "_model_module_version": "1.2.0",
5359
            "_model_name": "LayoutModel",
5360
            "_view_count": null,
5361
            "_view_module": "@jupyter-widgets/base",
5362
            "_view_module_version": "1.2.0",
5363
            "_view_name": "LayoutView",
5364
            "align_content": null,
5365
            "align_items": null,
5366
            "align_self": null,
5367
            "border": null,
5368
            "bottom": null,
5369
            "display": null,
5370
            "flex": null,
5371
            "flex_flow": null,
5372
            "grid_area": null,
5373
            "grid_auto_columns": null,
5374
            "grid_auto_flow": null,
5375
            "grid_auto_rows": null,
5376
            "grid_column": null,
5377
            "grid_gap": null,
5378
            "grid_row": null,
5379
            "grid_template_areas": null,
5380
            "grid_template_columns": null,
5381
            "grid_template_rows": null,
5382
            "height": null,
5383
            "justify_content": null,
5384
            "justify_items": null,
5385
            "left": null,
5386
            "margin": null,
5387
            "max_height": null,
5388
            "max_width": null,
5389
            "min_height": null,
5390
            "min_width": null,
5391
            "object_fit": null,
5392
            "object_position": null,
5393
            "order": null,
5394
            "overflow": null,
5395
            "overflow_x": null,
5396
            "overflow_y": null,
5397
            "padding": null,
5398
            "right": null,
5399
            "top": null,
5400
            "visibility": null,
5401
            "width": null
5402
          }
5403
        },
5404
        "013efa5f201d460e9332bd047982c449": {
5405
          "model_module": "@jupyter-widgets/base",
5406
          "model_name": "LayoutModel",
5407
          "model_module_version": "1.2.0",
5408
          "state": {
5409
            "_model_module": "@jupyter-widgets/base",
5410
            "_model_module_version": "1.2.0",
5411
            "_model_name": "LayoutModel",
5412
            "_view_count": null,
5413
            "_view_module": "@jupyter-widgets/base",
5414
            "_view_module_version": "1.2.0",
5415
            "_view_name": "LayoutView",
5416
            "align_content": null,
5417
            "align_items": null,
5418
            "align_self": null,
5419
            "border": null,
5420
            "bottom": null,
5421
            "display": null,
5422
            "flex": null,
5423
            "flex_flow": null,
5424
            "grid_area": null,
5425
            "grid_auto_columns": null,
5426
            "grid_auto_flow": null,
5427
            "grid_auto_rows": null,
5428
            "grid_column": null,
5429
            "grid_gap": null,
5430
            "grid_row": null,
5431
            "grid_template_areas": null,
5432
            "grid_template_columns": null,
5433
            "grid_template_rows": null,
5434
            "height": null,
5435
            "justify_content": null,
5436
            "justify_items": null,
5437
            "left": null,
5438
            "margin": null,
5439
            "max_height": null,
5440
            "max_width": null,
5441
            "min_height": null,
5442
            "min_width": null,
5443
            "object_fit": null,
5444
            "object_position": null,
5445
            "order": null,
5446
            "overflow": null,
5447
            "overflow_x": null,
5448
            "overflow_y": null,
5449
            "padding": null,
5450
            "right": null,
5451
            "top": null,
5452
            "visibility": null,
5453
            "width": null
5454
          }
5455
        },
5456
        "7a0323004bd14c579c27df13bf3f861b": {
5457
          "model_module": "@jupyter-widgets/controls",
5458
          "model_name": "DescriptionStyleModel",
5459
          "model_module_version": "1.5.0",
5460
          "state": {
5461
            "_model_module": "@jupyter-widgets/controls",
5462
            "_model_module_version": "1.5.0",
5463
            "_model_name": "DescriptionStyleModel",
5464
            "_view_count": null,
5465
            "_view_module": "@jupyter-widgets/base",
5466
            "_view_module_version": "1.2.0",
5467
            "_view_name": "StyleView",
5468
            "description_width": ""
5469
          }
5470
        },
5471
        "ef49e0a4613f4207b11012c494b4ed15": {
5472
          "model_module": "@jupyter-widgets/base",
5473
          "model_name": "LayoutModel",
5474
          "model_module_version": "1.2.0",
5475
          "state": {
5476
            "_model_module": "@jupyter-widgets/base",
5477
            "_model_module_version": "1.2.0",
5478
            "_model_name": "LayoutModel",
5479
            "_view_count": null,
5480
            "_view_module": "@jupyter-widgets/base",
5481
            "_view_module_version": "1.2.0",
5482
            "_view_name": "LayoutView",
5483
            "align_content": null,
5484
            "align_items": null,
5485
            "align_self": null,
5486
            "border": null,
5487
            "bottom": null,
5488
            "display": null,
5489
            "flex": null,
5490
            "flex_flow": null,
5491
            "grid_area": null,
5492
            "grid_auto_columns": null,
5493
            "grid_auto_flow": null,
5494
            "grid_auto_rows": null,
5495
            "grid_column": null,
5496
            "grid_gap": null,
5497
            "grid_row": null,
5498
            "grid_template_areas": null,
5499
            "grid_template_columns": null,
5500
            "grid_template_rows": null,
5501
            "height": null,
5502
            "justify_content": null,
5503
            "justify_items": null,
5504
            "left": null,
5505
            "margin": null,
5506
            "max_height": null,
5507
            "max_width": null,
5508
            "min_height": null,
5509
            "min_width": null,
5510
            "object_fit": null,
5511
            "object_position": null,
5512
            "order": null,
5513
            "overflow": null,
5514
            "overflow_x": null,
5515
            "overflow_y": null,
5516
            "padding": null,
5517
            "right": null,
5518
            "top": null,
5519
            "visibility": null,
5520
            "width": null
5521
          }
5522
        },
5523
        "f88c80704c59405f9224c9a98105303d": {
5524
          "model_module": "@jupyter-widgets/controls",
5525
          "model_name": "ProgressStyleModel",
5526
          "model_module_version": "1.5.0",
5527
          "state": {
5528
            "_model_module": "@jupyter-widgets/controls",
5529
            "_model_module_version": "1.5.0",
5530
            "_model_name": "ProgressStyleModel",
5531
            "_view_count": null,
5532
            "_view_module": "@jupyter-widgets/base",
5533
            "_view_module_version": "1.2.0",
5534
            "_view_name": "StyleView",
5535
            "bar_color": null,
5536
            "description_width": ""
5537
          }
5538
        },
5539
        "1a4eab919e9e475ea452687215355489": {
5540
          "model_module": "@jupyter-widgets/base",
5541
          "model_name": "LayoutModel",
5542
          "model_module_version": "1.2.0",
5543
          "state": {
5544
            "_model_module": "@jupyter-widgets/base",
5545
            "_model_module_version": "1.2.0",
5546
            "_model_name": "LayoutModel",
5547
            "_view_count": null,
5548
            "_view_module": "@jupyter-widgets/base",
5549
            "_view_module_version": "1.2.0",
5550
            "_view_name": "LayoutView",
5551
            "align_content": null,
5552
            "align_items": null,
5553
            "align_self": null,
5554
            "border": null,
5555
            "bottom": null,
5556
            "display": null,
5557
            "flex": null,
5558
            "flex_flow": null,
5559
            "grid_area": null,
5560
            "grid_auto_columns": null,
5561
            "grid_auto_flow": null,
5562
            "grid_auto_rows": null,
5563
            "grid_column": null,
5564
            "grid_gap": null,
5565
            "grid_row": null,
5566
            "grid_template_areas": null,
5567
            "grid_template_columns": null,
5568
            "grid_template_rows": null,
5569
            "height": null,
5570
            "justify_content": null,
5571
            "justify_items": null,
5572
            "left": null,
5573
            "margin": null,
5574
            "max_height": null,
5575
            "max_width": null,
5576
            "min_height": null,
5577
            "min_width": null,
5578
            "object_fit": null,
5579
            "object_position": null,
5580
            "order": null,
5581
            "overflow": null,
5582
            "overflow_x": null,
5583
            "overflow_y": null,
5584
            "padding": null,
5585
            "right": null,
5586
            "top": null,
5587
            "visibility": null,
5588
            "width": null
5589
          }
5590
        },
5591
        "cf5d7616d08944108ae6dcfdb487cd5a": {
5592
          "model_module": "@jupyter-widgets/controls",
5593
          "model_name": "DescriptionStyleModel",
5594
          "model_module_version": "1.5.0",
5595
          "state": {
5596
            "_model_module": "@jupyter-widgets/controls",
5597
            "_model_module_version": "1.5.0",
5598
            "_model_name": "DescriptionStyleModel",
5599
            "_view_count": null,
5600
            "_view_module": "@jupyter-widgets/base",
5601
            "_view_module_version": "1.2.0",
5602
            "_view_name": "StyleView",
5603
            "description_width": ""
5604
          }
5605
        },
5606
        "ae392574043b40c8bf461468e8f06f18": {
5607
          "model_module": "@jupyter-widgets/controls",
5608
          "model_name": "HBoxModel",
5609
          "model_module_version": "1.5.0",
5610
          "state": {
5611
            "_dom_classes": [],
5612
            "_model_module": "@jupyter-widgets/controls",
5613
            "_model_module_version": "1.5.0",
5614
            "_model_name": "HBoxModel",
5615
            "_view_count": null,
5616
            "_view_module": "@jupyter-widgets/controls",
5617
            "_view_module_version": "1.5.0",
5618
            "_view_name": "HBoxView",
5619
            "box_style": "",
5620
            "children": [
5621
              "IPY_MODEL_8af0715bcf2647c390c073430ffbbe5d",
5622
              "IPY_MODEL_44e3af9f1124467b94cbb6f90f1669a3",
5623
              "IPY_MODEL_804dbc29e4a9404d935b8ab10dc481e9"
5624
            ],
5625
            "layout": "IPY_MODEL_6ed75fda4c3f4fb2ab931ee7e162b3c4"
5626
          }
5627
        },
5628
        "8af0715bcf2647c390c073430ffbbe5d": {
5629
          "model_module": "@jupyter-widgets/controls",
5630
          "model_name": "HTMLModel",
5631
          "model_module_version": "1.5.0",
5632
          "state": {
5633
            "_dom_classes": [],
5634
            "_model_module": "@jupyter-widgets/controls",
5635
            "_model_module_version": "1.5.0",
5636
            "_model_name": "HTMLModel",
5637
            "_view_count": null,
5638
            "_view_module": "@jupyter-widgets/controls",
5639
            "_view_module_version": "1.5.0",
5640
            "_view_name": "HTMLView",
5641
            "description": "",
5642
            "description_tooltip": null,
5643
            "layout": "IPY_MODEL_d71d92ee94994f93bbbdedb475dc5ba5",
5644
            "placeholder": "​",
5645
            "style": "IPY_MODEL_1780c6cc045a49a6838bc1e43c89dca1",
5646
            "value": "config.json: 100%"
5647
          }
5648
        },
5649
        "44e3af9f1124467b94cbb6f90f1669a3": {
5650
          "model_module": "@jupyter-widgets/controls",
5651
          "model_name": "FloatProgressModel",
5652
          "model_module_version": "1.5.0",
5653
          "state": {
5654
            "_dom_classes": [],
5655
            "_model_module": "@jupyter-widgets/controls",
5656
            "_model_module_version": "1.5.0",
5657
            "_model_name": "FloatProgressModel",
5658
            "_view_count": null,
5659
            "_view_module": "@jupyter-widgets/controls",
5660
            "_view_module_version": "1.5.0",
5661
            "_view_name": "ProgressView",
5662
            "bar_style": "success",
5663
            "description": "",
5664
            "description_tooltip": null,
5665
            "layout": "IPY_MODEL_51b7d216e4144dd0a5ea92eb12e410b7",
5666
            "max": 615,
5667
            "min": 0,
5668
            "orientation": "horizontal",
5669
            "style": "IPY_MODEL_6e2bf10b566a453d8b342e824ec36fdf",
5670
            "value": 615
5671
          }
5672
        },
5673
        "804dbc29e4a9404d935b8ab10dc481e9": {
5674
          "model_module": "@jupyter-widgets/controls",
5675
          "model_name": "HTMLModel",
5676
          "model_module_version": "1.5.0",
5677
          "state": {
5678
            "_dom_classes": [],
5679
            "_model_module": "@jupyter-widgets/controls",
5680
            "_model_module_version": "1.5.0",
5681
            "_model_name": "HTMLModel",
5682
            "_view_count": null,
5683
            "_view_module": "@jupyter-widgets/controls",
5684
            "_view_module_version": "1.5.0",
5685
            "_view_name": "HTMLView",
5686
            "description": "",
5687
            "description_tooltip": null,
5688
            "layout": "IPY_MODEL_1a7b5758d56d4811902698d1ad696163",
5689
            "placeholder": "​",
5690
            "style": "IPY_MODEL_11047f16bb534e70ad1d6bdde663cfa4",
5691
            "value": " 615/615 [00:00&lt;00:00, 24.8kB/s]"
5692
          }
5693
        },
5694
        "6ed75fda4c3f4fb2ab931ee7e162b3c4": {
5695
          "model_module": "@jupyter-widgets/base",
5696
          "model_name": "LayoutModel",
5697
          "model_module_version": "1.2.0",
5698
          "state": {
5699
            "_model_module": "@jupyter-widgets/base",
5700
            "_model_module_version": "1.2.0",
5701
            "_model_name": "LayoutModel",
5702
            "_view_count": null,
5703
            "_view_module": "@jupyter-widgets/base",
5704
            "_view_module_version": "1.2.0",
5705
            "_view_name": "LayoutView",
5706
            "align_content": null,
5707
            "align_items": null,
5708
            "align_self": null,
5709
            "border": null,
5710
            "bottom": null,
5711
            "display": null,
5712
            "flex": null,
5713
            "flex_flow": null,
5714
            "grid_area": null,
5715
            "grid_auto_columns": null,
5716
            "grid_auto_flow": null,
5717
            "grid_auto_rows": null,
5718
            "grid_column": null,
5719
            "grid_gap": null,
5720
            "grid_row": null,
5721
            "grid_template_areas": null,
5722
            "grid_template_columns": null,
5723
            "grid_template_rows": null,
5724
            "height": null,
5725
            "justify_content": null,
5726
            "justify_items": null,
5727
            "left": null,
5728
            "margin": null,
5729
            "max_height": null,
5730
            "max_width": null,
5731
            "min_height": null,
5732
            "min_width": null,
5733
            "object_fit": null,
5734
            "object_position": null,
5735
            "order": null,
5736
            "overflow": null,
5737
            "overflow_x": null,
5738
            "overflow_y": null,
5739
            "padding": null,
5740
            "right": null,
5741
            "top": null,
5742
            "visibility": null,
5743
            "width": null
5744
          }
5745
        },
5746
        "d71d92ee94994f93bbbdedb475dc5ba5": {
5747
          "model_module": "@jupyter-widgets/base",
5748
          "model_name": "LayoutModel",
5749
          "model_module_version": "1.2.0",
5750
          "state": {
5751
            "_model_module": "@jupyter-widgets/base",
5752
            "_model_module_version": "1.2.0",
5753
            "_model_name": "LayoutModel",
5754
            "_view_count": null,
5755
            "_view_module": "@jupyter-widgets/base",
5756
            "_view_module_version": "1.2.0",
5757
            "_view_name": "LayoutView",
5758
            "align_content": null,
5759
            "align_items": null,
5760
            "align_self": null,
5761
            "border": null,
5762
            "bottom": null,
5763
            "display": null,
5764
            "flex": null,
5765
            "flex_flow": null,
5766
            "grid_area": null,
5767
            "grid_auto_columns": null,
5768
            "grid_auto_flow": null,
5769
            "grid_auto_rows": null,
5770
            "grid_column": null,
5771
            "grid_gap": null,
5772
            "grid_row": null,
5773
            "grid_template_areas": null,
5774
            "grid_template_columns": null,
5775
            "grid_template_rows": null,
5776
            "height": null,
5777
            "justify_content": null,
5778
            "justify_items": null,
5779
            "left": null,
5780
            "margin": null,
5781
            "max_height": null,
5782
            "max_width": null,
5783
            "min_height": null,
5784
            "min_width": null,
5785
            "object_fit": null,
5786
            "object_position": null,
5787
            "order": null,
5788
            "overflow": null,
5789
            "overflow_x": null,
5790
            "overflow_y": null,
5791
            "padding": null,
5792
            "right": null,
5793
            "top": null,
5794
            "visibility": null,
5795
            "width": null
5796
          }
5797
        },
5798
        "1780c6cc045a49a6838bc1e43c89dca1": {
5799
          "model_module": "@jupyter-widgets/controls",
5800
          "model_name": "DescriptionStyleModel",
5801
          "model_module_version": "1.5.0",
5802
          "state": {
5803
            "_model_module": "@jupyter-widgets/controls",
5804
            "_model_module_version": "1.5.0",
5805
            "_model_name": "DescriptionStyleModel",
5806
            "_view_count": null,
5807
            "_view_module": "@jupyter-widgets/base",
5808
            "_view_module_version": "1.2.0",
5809
            "_view_name": "StyleView",
5810
            "description_width": ""
5811
          }
5812
        },
5813
        "51b7d216e4144dd0a5ea92eb12e410b7": {
5814
          "model_module": "@jupyter-widgets/base",
5815
          "model_name": "LayoutModel",
5816
          "model_module_version": "1.2.0",
5817
          "state": {
5818
            "_model_module": "@jupyter-widgets/base",
5819
            "_model_module_version": "1.2.0",
5820
            "_model_name": "LayoutModel",
5821
            "_view_count": null,
5822
            "_view_module": "@jupyter-widgets/base",
5823
            "_view_module_version": "1.2.0",
5824
            "_view_name": "LayoutView",
5825
            "align_content": null,
5826
            "align_items": null,
5827
            "align_self": null,
5828
            "border": null,
5829
            "bottom": null,
5830
            "display": null,
5831
            "flex": null,
5832
            "flex_flow": null,
5833
            "grid_area": null,
5834
            "grid_auto_columns": null,
5835
            "grid_auto_flow": null,
5836
            "grid_auto_rows": null,
5837
            "grid_column": null,
5838
            "grid_gap": null,
5839
            "grid_row": null,
5840
            "grid_template_areas": null,
5841
            "grid_template_columns": null,
5842
            "grid_template_rows": null,
5843
            "height": null,
5844
            "justify_content": null,
5845
            "justify_items": null,
5846
            "left": null,
5847
            "margin": null,
5848
            "max_height": null,
5849
            "max_width": null,
5850
            "min_height": null,
5851
            "min_width": null,
5852
            "object_fit": null,
5853
            "object_position": null,
5854
            "order": null,
5855
            "overflow": null,
5856
            "overflow_x": null,
5857
            "overflow_y": null,
5858
            "padding": null,
5859
            "right": null,
5860
            "top": null,
5861
            "visibility": null,
5862
            "width": null
5863
          }
5864
        },
5865
        "6e2bf10b566a453d8b342e824ec36fdf": {
5866
          "model_module": "@jupyter-widgets/controls",
5867
          "model_name": "ProgressStyleModel",
5868
          "model_module_version": "1.5.0",
5869
          "state": {
5870
            "_model_module": "@jupyter-widgets/controls",
5871
            "_model_module_version": "1.5.0",
5872
            "_model_name": "ProgressStyleModel",
5873
            "_view_count": null,
5874
            "_view_module": "@jupyter-widgets/base",
5875
            "_view_module_version": "1.2.0",
5876
            "_view_name": "StyleView",
5877
            "bar_color": null,
5878
            "description_width": ""
5879
          }
5880
        },
5881
        "1a7b5758d56d4811902698d1ad696163": {
5882
          "model_module": "@jupyter-widgets/base",
5883
          "model_name": "LayoutModel",
5884
          "model_module_version": "1.2.0",
5885
          "state": {
5886
            "_model_module": "@jupyter-widgets/base",
5887
            "_model_module_version": "1.2.0",
5888
            "_model_name": "LayoutModel",
5889
            "_view_count": null,
5890
            "_view_module": "@jupyter-widgets/base",
5891
            "_view_module_version": "1.2.0",
5892
            "_view_name": "LayoutView",
5893
            "align_content": null,
5894
            "align_items": null,
5895
            "align_self": null,
5896
            "border": null,
5897
            "bottom": null,
5898
            "display": null,
5899
            "flex": null,
5900
            "flex_flow": null,
5901
            "grid_area": null,
5902
            "grid_auto_columns": null,
5903
            "grid_auto_flow": null,
5904
            "grid_auto_rows": null,
5905
            "grid_column": null,
5906
            "grid_gap": null,
5907
            "grid_row": null,
5908
            "grid_template_areas": null,
5909
            "grid_template_columns": null,
5910
            "grid_template_rows": null,
5911
            "height": null,
5912
            "justify_content": null,
5913
            "justify_items": null,
5914
            "left": null,
5915
            "margin": null,
5916
            "max_height": null,
5917
            "max_width": null,
5918
            "min_height": null,
5919
            "min_width": null,
5920
            "object_fit": null,
5921
            "object_position": null,
5922
            "order": null,
5923
            "overflow": null,
5924
            "overflow_x": null,
5925
            "overflow_y": null,
5926
            "padding": null,
5927
            "right": null,
5928
            "top": null,
5929
            "visibility": null,
5930
            "width": null
5931
          }
5932
        },
5933
        "11047f16bb534e70ad1d6bdde663cfa4": {
5934
          "model_module": "@jupyter-widgets/controls",
5935
          "model_name": "DescriptionStyleModel",
5936
          "model_module_version": "1.5.0",
5937
          "state": {
5938
            "_model_module": "@jupyter-widgets/controls",
5939
            "_model_module_version": "1.5.0",
5940
            "_model_name": "DescriptionStyleModel",
5941
            "_view_count": null,
5942
            "_view_module": "@jupyter-widgets/base",
5943
            "_view_module_version": "1.2.0",
5944
            "_view_name": "StyleView",
5945
            "description_width": ""
5946
          }
5947
        },
5948
        "3791512b6e92422799da4e950c6dde6d": {
5949
          "model_module": "@jupyter-widgets/controls",
5950
          "model_name": "HBoxModel",
5951
          "model_module_version": "1.5.0",
5952
          "state": {
5953
            "_dom_classes": [],
5954
            "_model_module": "@jupyter-widgets/controls",
5955
            "_model_module_version": "1.5.0",
5956
            "_model_name": "HBoxModel",
5957
            "_view_count": null,
5958
            "_view_module": "@jupyter-widgets/controls",
5959
            "_view_module_version": "1.5.0",
5960
            "_view_name": "HBoxView",
5961
            "box_style": "",
5962
            "children": [
5963
              "IPY_MODEL_161970c8736a46899cd017c5e5906352",
5964
              "IPY_MODEL_cfafc68041754603b0916c8e28a90d0b",
5965
              "IPY_MODEL_a91cbbefe5ca4c07a46ad81424e2f0a7"
5966
            ],
5967
            "layout": "IPY_MODEL_995f169324d647fbb1c09c7ea3ec22d6"
5968
          }
5969
        },
5970
        "161970c8736a46899cd017c5e5906352": {
5971
          "model_module": "@jupyter-widgets/controls",
5972
          "model_name": "HTMLModel",
5973
          "model_module_version": "1.5.0",
5974
          "state": {
5975
            "_dom_classes": [],
5976
            "_model_module": "@jupyter-widgets/controls",
5977
            "_model_module_version": "1.5.0",
5978
            "_model_name": "HTMLModel",
5979
            "_view_count": null,
5980
            "_view_module": "@jupyter-widgets/controls",
5981
            "_view_module_version": "1.5.0",
5982
            "_view_name": "HTMLView",
5983
            "description": "",
5984
            "description_tooltip": null,
5985
            "layout": "IPY_MODEL_c617e79d6b4448f1af28c2d3c46d4ee5",
5986
            "placeholder": "​",
5987
            "style": "IPY_MODEL_1d23076205f3470590110feb368afd52",
5988
            "value": "sentencepiece.bpe.model: 100%"
5989
          }
5990
        },
5991
        "cfafc68041754603b0916c8e28a90d0b": {
5992
          "model_module": "@jupyter-widgets/controls",
5993
          "model_name": "FloatProgressModel",
5994
          "model_module_version": "1.5.0",
5995
          "state": {
5996
            "_dom_classes": [],
5997
            "_model_module": "@jupyter-widgets/controls",
5998
            "_model_module_version": "1.5.0",
5999
            "_model_name": "FloatProgressModel",
6000
            "_view_count": null,
6001
            "_view_module": "@jupyter-widgets/controls",
6002
            "_view_module_version": "1.5.0",
6003
            "_view_name": "ProgressView",
6004
            "bar_style": "success",
6005
            "description": "",
6006
            "description_tooltip": null,
6007
            "layout": "IPY_MODEL_99a5dcb8b54c4e1da8747d16e1d54538",
6008
            "max": 5069051,
6009
            "min": 0,
6010
            "orientation": "horizontal",
6011
            "style": "IPY_MODEL_a890ce1e992a4f04a524d3c76927d5f6",
6012
            "value": 5069051
6013
          }
6014
        },
6015
        "a91cbbefe5ca4c07a46ad81424e2f0a7": {
6016
          "model_module": "@jupyter-widgets/controls",
6017
          "model_name": "HTMLModel",
6018
          "model_module_version": "1.5.0",
6019
          "state": {
6020
            "_dom_classes": [],
6021
            "_model_module": "@jupyter-widgets/controls",
6022
            "_model_module_version": "1.5.0",
6023
            "_model_name": "HTMLModel",
6024
            "_view_count": null,
6025
            "_view_module": "@jupyter-widgets/controls",
6026
            "_view_module_version": "1.5.0",
6027
            "_view_name": "HTMLView",
6028
            "description": "",
6029
            "description_tooltip": null,
6030
            "layout": "IPY_MODEL_4e12dbb6c203422193cf12d58338f1c3",
6031
            "placeholder": "​",
6032
            "style": "IPY_MODEL_ee8308b8ca9a4dd8b9993824f2df176c",
6033
            "value": " 5.07M/5.07M [00:00&lt;00:00, 11.2MB/s]"
6034
          }
6035
        },
6036
        "995f169324d647fbb1c09c7ea3ec22d6": {
6037
          "model_module": "@jupyter-widgets/base",
6038
          "model_name": "LayoutModel",
6039
          "model_module_version": "1.2.0",
6040
          "state": {
6041
            "_model_module": "@jupyter-widgets/base",
6042
            "_model_module_version": "1.2.0",
6043
            "_model_name": "LayoutModel",
6044
            "_view_count": null,
6045
            "_view_module": "@jupyter-widgets/base",
6046
            "_view_module_version": "1.2.0",
6047
            "_view_name": "LayoutView",
6048
            "align_content": null,
6049
            "align_items": null,
6050
            "align_self": null,
6051
            "border": null,
6052
            "bottom": null,
6053
            "display": null,
6054
            "flex": null,
6055
            "flex_flow": null,
6056
            "grid_area": null,
6057
            "grid_auto_columns": null,
6058
            "grid_auto_flow": null,
6059
            "grid_auto_rows": null,
6060
            "grid_column": null,
6061
            "grid_gap": null,
6062
            "grid_row": null,
6063
            "grid_template_areas": null,
6064
            "grid_template_columns": null,
6065
            "grid_template_rows": null,
6066
            "height": null,
6067
            "justify_content": null,
6068
            "justify_items": null,
6069
            "left": null,
6070
            "margin": null,
6071
            "max_height": null,
6072
            "max_width": null,
6073
            "min_height": null,
6074
            "min_width": null,
6075
            "object_fit": null,
6076
            "object_position": null,
6077
            "order": null,
6078
            "overflow": null,
6079
            "overflow_x": null,
6080
            "overflow_y": null,
6081
            "padding": null,
6082
            "right": null,
6083
            "top": null,
6084
            "visibility": null,
6085
            "width": null
6086
          }
6087
        },
6088
        "c617e79d6b4448f1af28c2d3c46d4ee5": {
6089
          "model_module": "@jupyter-widgets/base",
6090
          "model_name": "LayoutModel",
6091
          "model_module_version": "1.2.0",
6092
          "state": {
6093
            "_model_module": "@jupyter-widgets/base",
6094
            "_model_module_version": "1.2.0",
6095
            "_model_name": "LayoutModel",
6096
            "_view_count": null,
6097
            "_view_module": "@jupyter-widgets/base",
6098
            "_view_module_version": "1.2.0",
6099
            "_view_name": "LayoutView",
6100
            "align_content": null,
6101
            "align_items": null,
6102
            "align_self": null,
6103
            "border": null,
6104
            "bottom": null,
6105
            "display": null,
6106
            "flex": null,
6107
            "flex_flow": null,
6108
            "grid_area": null,
6109
            "grid_auto_columns": null,
6110
            "grid_auto_flow": null,
6111
            "grid_auto_rows": null,
6112
            "grid_column": null,
6113
            "grid_gap": null,
6114
            "grid_row": null,
6115
            "grid_template_areas": null,
6116
            "grid_template_columns": null,
6117
            "grid_template_rows": null,
6118
            "height": null,
6119
            "justify_content": null,
6120
            "justify_items": null,
6121
            "left": null,
6122
            "margin": null,
6123
            "max_height": null,
6124
            "max_width": null,
6125
            "min_height": null,
6126
            "min_width": null,
6127
            "object_fit": null,
6128
            "object_position": null,
6129
            "order": null,
6130
            "overflow": null,
6131
            "overflow_x": null,
6132
            "overflow_y": null,
6133
            "padding": null,
6134
            "right": null,
6135
            "top": null,
6136
            "visibility": null,
6137
            "width": null
6138
          }
6139
        },
6140
        "1d23076205f3470590110feb368afd52": {
6141
          "model_module": "@jupyter-widgets/controls",
6142
          "model_name": "DescriptionStyleModel",
6143
          "model_module_version": "1.5.0",
6144
          "state": {
6145
            "_model_module": "@jupyter-widgets/controls",
6146
            "_model_module_version": "1.5.0",
6147
            "_model_name": "DescriptionStyleModel",
6148
            "_view_count": null,
6149
            "_view_module": "@jupyter-widgets/base",
6150
            "_view_module_version": "1.2.0",
6151
            "_view_name": "StyleView",
6152
            "description_width": ""
6153
          }
6154
        },
6155
        "99a5dcb8b54c4e1da8747d16e1d54538": {
6156
          "model_module": "@jupyter-widgets/base",
6157
          "model_name": "LayoutModel",
6158
          "model_module_version": "1.2.0",
6159
          "state": {
6160
            "_model_module": "@jupyter-widgets/base",
6161
            "_model_module_version": "1.2.0",
6162
            "_model_name": "LayoutModel",
6163
            "_view_count": null,
6164
            "_view_module": "@jupyter-widgets/base",
6165
            "_view_module_version": "1.2.0",
6166
            "_view_name": "LayoutView",
6167
            "align_content": null,
6168
            "align_items": null,
6169
            "align_self": null,
6170
            "border": null,
6171
            "bottom": null,
6172
            "display": null,
6173
            "flex": null,
6174
            "flex_flow": null,
6175
            "grid_area": null,
6176
            "grid_auto_columns": null,
6177
            "grid_auto_flow": null,
6178
            "grid_auto_rows": null,
6179
            "grid_column": null,
6180
            "grid_gap": null,
6181
            "grid_row": null,
6182
            "grid_template_areas": null,
6183
            "grid_template_columns": null,
6184
            "grid_template_rows": null,
6185
            "height": null,
6186
            "justify_content": null,
6187
            "justify_items": null,
6188
            "left": null,
6189
            "margin": null,
6190
            "max_height": null,
6191
            "max_width": null,
6192
            "min_height": null,
6193
            "min_width": null,
6194
            "object_fit": null,
6195
            "object_position": null,
6196
            "order": null,
6197
            "overflow": null,
6198
            "overflow_x": null,
6199
            "overflow_y": null,
6200
            "padding": null,
6201
            "right": null,
6202
            "top": null,
6203
            "visibility": null,
6204
            "width": null
6205
          }
6206
        },
6207
        "a890ce1e992a4f04a524d3c76927d5f6": {
6208
          "model_module": "@jupyter-widgets/controls",
6209
          "model_name": "ProgressStyleModel",
6210
          "model_module_version": "1.5.0",
6211
          "state": {
6212
            "_model_module": "@jupyter-widgets/controls",
6213
            "_model_module_version": "1.5.0",
6214
            "_model_name": "ProgressStyleModel",
6215
            "_view_count": null,
6216
            "_view_module": "@jupyter-widgets/base",
6217
            "_view_module_version": "1.2.0",
6218
            "_view_name": "StyleView",
6219
            "bar_color": null,
6220
            "description_width": ""
6221
          }
6222
        },
6223
        "4e12dbb6c203422193cf12d58338f1c3": {
6224
          "model_module": "@jupyter-widgets/base",
6225
          "model_name": "LayoutModel",
6226
          "model_module_version": "1.2.0",
6227
          "state": {
6228
            "_model_module": "@jupyter-widgets/base",
6229
            "_model_module_version": "1.2.0",
6230
            "_model_name": "LayoutModel",
6231
            "_view_count": null,
6232
            "_view_module": "@jupyter-widgets/base",
6233
            "_view_module_version": "1.2.0",
6234
            "_view_name": "LayoutView",
6235
            "align_content": null,
6236
            "align_items": null,
6237
            "align_self": null,
6238
            "border": null,
6239
            "bottom": null,
6240
            "display": null,
6241
            "flex": null,
6242
            "flex_flow": null,
6243
            "grid_area": null,
6244
            "grid_auto_columns": null,
6245
            "grid_auto_flow": null,
6246
            "grid_auto_rows": null,
6247
            "grid_column": null,
6248
            "grid_gap": null,
6249
            "grid_row": null,
6250
            "grid_template_areas": null,
6251
            "grid_template_columns": null,
6252
            "grid_template_rows": null,
6253
            "height": null,
6254
            "justify_content": null,
6255
            "justify_items": null,
6256
            "left": null,
6257
            "margin": null,
6258
            "max_height": null,
6259
            "max_width": null,
6260
            "min_height": null,
6261
            "min_width": null,
6262
            "object_fit": null,
6263
            "object_position": null,
6264
            "order": null,
6265
            "overflow": null,
6266
            "overflow_x": null,
6267
            "overflow_y": null,
6268
            "padding": null,
6269
            "right": null,
6270
            "top": null,
6271
            "visibility": null,
6272
            "width": null
6273
          }
6274
        },
6275
        "ee8308b8ca9a4dd8b9993824f2df176c": {
6276
          "model_module": "@jupyter-widgets/controls",
6277
          "model_name": "DescriptionStyleModel",
6278
          "model_module_version": "1.5.0",
6279
          "state": {
6280
            "_model_module": "@jupyter-widgets/controls",
6281
            "_model_module_version": "1.5.0",
6282
            "_model_name": "DescriptionStyleModel",
6283
            "_view_count": null,
6284
            "_view_module": "@jupyter-widgets/base",
6285
            "_view_module_version": "1.2.0",
6286
            "_view_name": "StyleView",
6287
            "description_width": ""
6288
          }
6289
        },
6290
        "3786b6a492114f58b8c92f904c80b9e2": {
6291
          "model_module": "@jupyter-widgets/controls",
6292
          "model_name": "HBoxModel",
6293
          "model_module_version": "1.5.0",
6294
          "state": {
6295
            "_dom_classes": [],
6296
            "_model_module": "@jupyter-widgets/controls",
6297
            "_model_module_version": "1.5.0",
6298
            "_model_name": "HBoxModel",
6299
            "_view_count": null,
6300
            "_view_module": "@jupyter-widgets/controls",
6301
            "_view_module_version": "1.5.0",
6302
            "_view_name": "HBoxView",
6303
            "box_style": "",
6304
            "children": [
6305
              "IPY_MODEL_d6d29174233e4d1596dd391b9a22eac2",
6306
              "IPY_MODEL_fc135f2fe68d4b83ad57ecb3085febe1",
6307
              "IPY_MODEL_f539dc11a57e425ea714a681d5027960"
6308
            ],
6309
            "layout": "IPY_MODEL_af7fa5b783c54fe4b62e53cd6641de2b"
6310
          }
6311
        },
6312
        "d6d29174233e4d1596dd391b9a22eac2": {
6313
          "model_module": "@jupyter-widgets/controls",
6314
          "model_name": "HTMLModel",
6315
          "model_module_version": "1.5.0",
6316
          "state": {
6317
            "_dom_classes": [],
6318
            "_model_module": "@jupyter-widgets/controls",
6319
            "_model_module_version": "1.5.0",
6320
            "_model_name": "HTMLModel",
6321
            "_view_count": null,
6322
            "_view_module": "@jupyter-widgets/controls",
6323
            "_view_module_version": "1.5.0",
6324
            "_view_name": "HTMLView",
6325
            "description": "",
6326
            "description_tooltip": null,
6327
            "layout": "IPY_MODEL_f0e324c229f7477baf1bb26fa4bc85ed",
6328
            "placeholder": "​",
6329
            "style": "IPY_MODEL_205312c7b62b4410a5aa1298deb1fb6f",
6330
            "value": "tokenizer.json: 100%"
6331
          }
6332
        },
6333
        "fc135f2fe68d4b83ad57ecb3085febe1": {
6334
          "model_module": "@jupyter-widgets/controls",
6335
          "model_name": "FloatProgressModel",
6336
          "model_module_version": "1.5.0",
6337
          "state": {
6338
            "_dom_classes": [],
6339
            "_model_module": "@jupyter-widgets/controls",
6340
            "_model_module_version": "1.5.0",
6341
            "_model_name": "FloatProgressModel",
6342
            "_view_count": null,
6343
            "_view_module": "@jupyter-widgets/controls",
6344
            "_view_module_version": "1.5.0",
6345
            "_view_name": "ProgressView",
6346
            "bar_style": "success",
6347
            "description": "",
6348
            "description_tooltip": null,
6349
            "layout": "IPY_MODEL_4847afa0d5044cb0a0d693495ff05c63",
6350
            "max": 9096718,
6351
            "min": 0,
6352
            "orientation": "horizontal",
6353
            "style": "IPY_MODEL_0a7d5f7c3174491990f4f1018cbafe4f",
6354
            "value": 9096718
6355
          }
6356
        },
6357
        "f539dc11a57e425ea714a681d5027960": {
6358
          "model_module": "@jupyter-widgets/controls",
6359
          "model_name": "HTMLModel",
6360
          "model_module_version": "1.5.0",
6361
          "state": {
6362
            "_dom_classes": [],
6363
            "_model_module": "@jupyter-widgets/controls",
6364
            "_model_module_version": "1.5.0",
6365
            "_model_name": "HTMLModel",
6366
            "_view_count": null,
6367
            "_view_module": "@jupyter-widgets/controls",
6368
            "_view_module_version": "1.5.0",
6369
            "_view_name": "HTMLView",
6370
            "description": "",
6371
            "description_tooltip": null,
6372
            "layout": "IPY_MODEL_b01bb888c2a646c0af368da52b9b4f2f",
6373
            "placeholder": "​",
6374
            "style": "IPY_MODEL_bf557e4621f04d56a736fda56470d77e",
6375
            "value": " 9.10M/9.10M [00:00&lt;00:00, 17.7MB/s]"
6376
          }
6377
        },
6378
        "af7fa5b783c54fe4b62e53cd6641de2b": {
6379
          "model_module": "@jupyter-widgets/base",
6380
          "model_name": "LayoutModel",
6381
          "model_module_version": "1.2.0",
6382
          "state": {
6383
            "_model_module": "@jupyter-widgets/base",
6384
            "_model_module_version": "1.2.0",
6385
            "_model_name": "LayoutModel",
6386
            "_view_count": null,
6387
            "_view_module": "@jupyter-widgets/base",
6388
            "_view_module_version": "1.2.0",
6389
            "_view_name": "LayoutView",
6390
            "align_content": null,
6391
            "align_items": null,
6392
            "align_self": null,
6393
            "border": null,
6394
            "bottom": null,
6395
            "display": null,
6396
            "flex": null,
6397
            "flex_flow": null,
6398
            "grid_area": null,
6399
            "grid_auto_columns": null,
6400
            "grid_auto_flow": null,
6401
            "grid_auto_rows": null,
6402
            "grid_column": null,
6403
            "grid_gap": null,
6404
            "grid_row": null,
6405
            "grid_template_areas": null,
6406
            "grid_template_columns": null,
6407
            "grid_template_rows": null,
6408
            "height": null,
6409
            "justify_content": null,
6410
            "justify_items": null,
6411
            "left": null,
6412
            "margin": null,
6413
            "max_height": null,
6414
            "max_width": null,
6415
            "min_height": null,
6416
            "min_width": null,
6417
            "object_fit": null,
6418
            "object_position": null,
6419
            "order": null,
6420
            "overflow": null,
6421
            "overflow_x": null,
6422
            "overflow_y": null,
6423
            "padding": null,
6424
            "right": null,
6425
            "top": null,
6426
            "visibility": null,
6427
            "width": null
6428
          }
6429
        },
6430
        "f0e324c229f7477baf1bb26fa4bc85ed": {
6431
          "model_module": "@jupyter-widgets/base",
6432
          "model_name": "LayoutModel",
6433
          "model_module_version": "1.2.0",
6434
          "state": {
6435
            "_model_module": "@jupyter-widgets/base",
6436
            "_model_module_version": "1.2.0",
6437
            "_model_name": "LayoutModel",
6438
            "_view_count": null,
6439
            "_view_module": "@jupyter-widgets/base",
6440
            "_view_module_version": "1.2.0",
6441
            "_view_name": "LayoutView",
6442
            "align_content": null,
6443
            "align_items": null,
6444
            "align_self": null,
6445
            "border": null,
6446
            "bottom": null,
6447
            "display": null,
6448
            "flex": null,
6449
            "flex_flow": null,
6450
            "grid_area": null,
6451
            "grid_auto_columns": null,
6452
            "grid_auto_flow": null,
6453
            "grid_auto_rows": null,
6454
            "grid_column": null,
6455
            "grid_gap": null,
6456
            "grid_row": null,
6457
            "grid_template_areas": null,
6458
            "grid_template_columns": null,
6459
            "grid_template_rows": null,
6460
            "height": null,
6461
            "justify_content": null,
6462
            "justify_items": null,
6463
            "left": null,
6464
            "margin": null,
6465
            "max_height": null,
6466
            "max_width": null,
6467
            "min_height": null,
6468
            "min_width": null,
6469
            "object_fit": null,
6470
            "object_position": null,
6471
            "order": null,
6472
            "overflow": null,
6473
            "overflow_x": null,
6474
            "overflow_y": null,
6475
            "padding": null,
6476
            "right": null,
6477
            "top": null,
6478
            "visibility": null,
6479
            "width": null
6480
          }
6481
        },
6482
        "205312c7b62b4410a5aa1298deb1fb6f": {
6483
          "model_module": "@jupyter-widgets/controls",
6484
          "model_name": "DescriptionStyleModel",
6485
          "model_module_version": "1.5.0",
6486
          "state": {
6487
            "_model_module": "@jupyter-widgets/controls",
6488
            "_model_module_version": "1.5.0",
6489
            "_model_name": "DescriptionStyleModel",
6490
            "_view_count": null,
6491
            "_view_module": "@jupyter-widgets/base",
6492
            "_view_module_version": "1.2.0",
6493
            "_view_name": "StyleView",
6494
            "description_width": ""
6495
          }
6496
        },
6497
        "4847afa0d5044cb0a0d693495ff05c63": {
6498
          "model_module": "@jupyter-widgets/base",
6499
          "model_name": "LayoutModel",
6500
          "model_module_version": "1.2.0",
6501
          "state": {
6502
            "_model_module": "@jupyter-widgets/base",
6503
            "_model_module_version": "1.2.0",
6504
            "_model_name": "LayoutModel",
6505
            "_view_count": null,
6506
            "_view_module": "@jupyter-widgets/base",
6507
            "_view_module_version": "1.2.0",
6508
            "_view_name": "LayoutView",
6509
            "align_content": null,
6510
            "align_items": null,
6511
            "align_self": null,
6512
            "border": null,
6513
            "bottom": null,
6514
            "display": null,
6515
            "flex": null,
6516
            "flex_flow": null,
6517
            "grid_area": null,
6518
            "grid_auto_columns": null,
6519
            "grid_auto_flow": null,
6520
            "grid_auto_rows": null,
6521
            "grid_column": null,
6522
            "grid_gap": null,
6523
            "grid_row": null,
6524
            "grid_template_areas": null,
6525
            "grid_template_columns": null,
6526
            "grid_template_rows": null,
6527
            "height": null,
6528
            "justify_content": null,
6529
            "justify_items": null,
6530
            "left": null,
6531
            "margin": null,
6532
            "max_height": null,
6533
            "max_width": null,
6534
            "min_height": null,
6535
            "min_width": null,
6536
            "object_fit": null,
6537
            "object_position": null,
6538
            "order": null,
6539
            "overflow": null,
6540
            "overflow_x": null,
6541
            "overflow_y": null,
6542
            "padding": null,
6543
            "right": null,
6544
            "top": null,
6545
            "visibility": null,
6546
            "width": null
6547
          }
6548
        },
6549
        "0a7d5f7c3174491990f4f1018cbafe4f": {
6550
          "model_module": "@jupyter-widgets/controls",
6551
          "model_name": "ProgressStyleModel",
6552
          "model_module_version": "1.5.0",
6553
          "state": {
6554
            "_model_module": "@jupyter-widgets/controls",
6555
            "_model_module_version": "1.5.0",
6556
            "_model_name": "ProgressStyleModel",
6557
            "_view_count": null,
6558
            "_view_module": "@jupyter-widgets/base",
6559
            "_view_module_version": "1.2.0",
6560
            "_view_name": "StyleView",
6561
            "bar_color": null,
6562
            "description_width": ""
6563
          }
6564
        },
6565
        "b01bb888c2a646c0af368da52b9b4f2f": {
6566
          "model_module": "@jupyter-widgets/base",
6567
          "model_name": "LayoutModel",
6568
          "model_module_version": "1.2.0",
6569
          "state": {
6570
            "_model_module": "@jupyter-widgets/base",
6571
            "_model_module_version": "1.2.0",
6572
            "_model_name": "LayoutModel",
6573
            "_view_count": null,
6574
            "_view_module": "@jupyter-widgets/base",
6575
            "_view_module_version": "1.2.0",
6576
            "_view_name": "LayoutView",
6577
            "align_content": null,
6578
            "align_items": null,
6579
            "align_self": null,
6580
            "border": null,
6581
            "bottom": null,
6582
            "display": null,
6583
            "flex": null,
6584
            "flex_flow": null,
6585
            "grid_area": null,
6586
            "grid_auto_columns": null,
6587
            "grid_auto_flow": null,
6588
            "grid_auto_rows": null,
6589
            "grid_column": null,
6590
            "grid_gap": null,
6591
            "grid_row": null,
6592
            "grid_template_areas": null,
6593
            "grid_template_columns": null,
6594
            "grid_template_rows": null,
6595
            "height": null,
6596
            "justify_content": null,
6597
            "justify_items": null,
6598
            "left": null,
6599
            "margin": null,
6600
            "max_height": null,
6601
            "max_width": null,
6602
            "min_height": null,
6603
            "min_width": null,
6604
            "object_fit": null,
6605
            "object_position": null,
6606
            "order": null,
6607
            "overflow": null,
6608
            "overflow_x": null,
6609
            "overflow_y": null,
6610
            "padding": null,
6611
            "right": null,
6612
            "top": null,
6613
            "visibility": null,
6614
            "width": null
6615
          }
6616
        },
6617
        "bf557e4621f04d56a736fda56470d77e": {
6618
          "model_module": "@jupyter-widgets/controls",
6619
          "model_name": "DescriptionStyleModel",
6620
          "model_module_version": "1.5.0",
6621
          "state": {
6622
            "_model_module": "@jupyter-widgets/controls",
6623
            "_model_module_version": "1.5.0",
6624
            "_model_name": "DescriptionStyleModel",
6625
            "_view_count": null,
6626
            "_view_module": "@jupyter-widgets/base",
6627
            "_view_module_version": "1.2.0",
6628
            "_view_name": "StyleView",
6629
            "description_width": ""
6630
          }
6631
        },
6632
        "cabdca2ad56e43eaa506e4588087c3f2": {
6633
          "model_module": "@jupyter-widgets/controls",
6634
          "model_name": "HBoxModel",
6635
          "model_module_version": "1.5.0",
6636
          "state": {
6637
            "_dom_classes": [],
6638
            "_model_module": "@jupyter-widgets/controls",
6639
            "_model_module_version": "1.5.0",
6640
            "_model_name": "HBoxModel",
6641
            "_view_count": null,
6642
            "_view_module": "@jupyter-widgets/controls",
6643
            "_view_module_version": "1.5.0",
6644
            "_view_name": "HBoxView",
6645
            "box_style": "",
6646
            "children": [
6647
              "IPY_MODEL_116d6d8de1af401a971f3da77cb4b916",
6648
              "IPY_MODEL_5644e8831133414dbb54e899f1253e8b",
6649
              "IPY_MODEL_75cff91a18654513bc53045df04832f6"
6650
            ],
6651
            "layout": "IPY_MODEL_883ee6d5131443eba2c4598c9fa0223e"
6652
          }
6653
        },
6654
        "116d6d8de1af401a971f3da77cb4b916": {
6655
          "model_module": "@jupyter-widgets/controls",
6656
          "model_name": "HTMLModel",
6657
          "model_module_version": "1.5.0",
6658
          "state": {
6659
            "_dom_classes": [],
6660
            "_model_module": "@jupyter-widgets/controls",
6661
            "_model_module_version": "1.5.0",
6662
            "_model_name": "HTMLModel",
6663
            "_view_count": null,
6664
            "_view_module": "@jupyter-widgets/controls",
6665
            "_view_module_version": "1.5.0",
6666
            "_view_name": "HTMLView",
6667
            "description": "",
6668
            "description_tooltip": null,
6669
            "layout": "IPY_MODEL_5454af7e0b964f388ecab1f83f74ecc9",
6670
            "placeholder": "​",
6671
            "style": "IPY_MODEL_9f62543c61b04183b7784826e9178321",
6672
            "value": "Map: 100%"
6673
          }
6674
        },
6675
        "5644e8831133414dbb54e899f1253e8b": {
6676
          "model_module": "@jupyter-widgets/controls",
6677
          "model_name": "FloatProgressModel",
6678
          "model_module_version": "1.5.0",
6679
          "state": {
6680
            "_dom_classes": [],
6681
            "_model_module": "@jupyter-widgets/controls",
6682
            "_model_module_version": "1.5.0",
6683
            "_model_name": "FloatProgressModel",
6684
            "_view_count": null,
6685
            "_view_module": "@jupyter-widgets/controls",
6686
            "_view_module_version": "1.5.0",
6687
            "_view_name": "ProgressView",
6688
            "bar_style": "success",
6689
            "description": "",
6690
            "description_tooltip": null,
6691
            "layout": "IPY_MODEL_dd31f0ed6eee4b6a97d4c9ee6addabe3",
6692
            "max": 1307,
6693
            "min": 0,
6694
            "orientation": "horizontal",
6695
            "style": "IPY_MODEL_9f072c7c1f7646ef876b49917e1eccff",
6696
            "value": 1307
6697
          }
6698
        },
6699
        "75cff91a18654513bc53045df04832f6": {
6700
          "model_module": "@jupyter-widgets/controls",
6701
          "model_name": "HTMLModel",
6702
          "model_module_version": "1.5.0",
6703
          "state": {
6704
            "_dom_classes": [],
6705
            "_model_module": "@jupyter-widgets/controls",
6706
            "_model_module_version": "1.5.0",
6707
            "_model_name": "HTMLModel",
6708
            "_view_count": null,
6709
            "_view_module": "@jupyter-widgets/controls",
6710
            "_view_module_version": "1.5.0",
6711
            "_view_name": "HTMLView",
6712
            "description": "",
6713
            "description_tooltip": null,
6714
            "layout": "IPY_MODEL_76f09404ea3d447782bb09cd27e9a723",
6715
            "placeholder": "​",
6716
            "style": "IPY_MODEL_f2356694803b4c678de2ca0a11cb11e2",
6717
            "value": " 1307/1307 [00:00&lt;00:00, 2282.77 examples/s]"
6718
          }
6719
        },
6720
        "883ee6d5131443eba2c4598c9fa0223e": {
6721
          "model_module": "@jupyter-widgets/base",
6722
          "model_name": "LayoutModel",
6723
          "model_module_version": "1.2.0",
6724
          "state": {
6725
            "_model_module": "@jupyter-widgets/base",
6726
            "_model_module_version": "1.2.0",
6727
            "_model_name": "LayoutModel",
6728
            "_view_count": null,
6729
            "_view_module": "@jupyter-widgets/base",
6730
            "_view_module_version": "1.2.0",
6731
            "_view_name": "LayoutView",
6732
            "align_content": null,
6733
            "align_items": null,
6734
            "align_self": null,
6735
            "border": null,
6736
            "bottom": null,
6737
            "display": null,
6738
            "flex": null,
6739
            "flex_flow": null,
6740
            "grid_area": null,
6741
            "grid_auto_columns": null,
6742
            "grid_auto_flow": null,
6743
            "grid_auto_rows": null,
6744
            "grid_column": null,
6745
            "grid_gap": null,
6746
            "grid_row": null,
6747
            "grid_template_areas": null,
6748
            "grid_template_columns": null,
6749
            "grid_template_rows": null,
6750
            "height": null,
6751
            "justify_content": null,
6752
            "justify_items": null,
6753
            "left": null,
6754
            "margin": null,
6755
            "max_height": null,
6756
            "max_width": null,
6757
            "min_height": null,
6758
            "min_width": null,
6759
            "object_fit": null,
6760
            "object_position": null,
6761
            "order": null,
6762
            "overflow": null,
6763
            "overflow_x": null,
6764
            "overflow_y": null,
6765
            "padding": null,
6766
            "right": null,
6767
            "top": null,
6768
            "visibility": null,
6769
            "width": null
6770
          }
6771
        },
6772
        "5454af7e0b964f388ecab1f83f74ecc9": {
6773
          "model_module": "@jupyter-widgets/base",
6774
          "model_name": "LayoutModel",
6775
          "model_module_version": "1.2.0",
6776
          "state": {
6777
            "_model_module": "@jupyter-widgets/base",
6778
            "_model_module_version": "1.2.0",
6779
            "_model_name": "LayoutModel",
6780
            "_view_count": null,
6781
            "_view_module": "@jupyter-widgets/base",
6782
            "_view_module_version": "1.2.0",
6783
            "_view_name": "LayoutView",
6784
            "align_content": null,
6785
            "align_items": null,
6786
            "align_self": null,
6787
            "border": null,
6788
            "bottom": null,
6789
            "display": null,
6790
            "flex": null,
6791
            "flex_flow": null,
6792
            "grid_area": null,
6793
            "grid_auto_columns": null,
6794
            "grid_auto_flow": null,
6795
            "grid_auto_rows": null,
6796
            "grid_column": null,
6797
            "grid_gap": null,
6798
            "grid_row": null,
6799
            "grid_template_areas": null,
6800
            "grid_template_columns": null,
6801
            "grid_template_rows": null,
6802
            "height": null,
6803
            "justify_content": null,
6804
            "justify_items": null,
6805
            "left": null,
6806
            "margin": null,
6807
            "max_height": null,
6808
            "max_width": null,
6809
            "min_height": null,
6810
            "min_width": null,
6811
            "object_fit": null,
6812
            "object_position": null,
6813
            "order": null,
6814
            "overflow": null,
6815
            "overflow_x": null,
6816
            "overflow_y": null,
6817
            "padding": null,
6818
            "right": null,
6819
            "top": null,
6820
            "visibility": null,
6821
            "width": null
6822
          }
6823
        },
6824
        "9f62543c61b04183b7784826e9178321": {
6825
          "model_module": "@jupyter-widgets/controls",
6826
          "model_name": "DescriptionStyleModel",
6827
          "model_module_version": "1.5.0",
6828
          "state": {
6829
            "_model_module": "@jupyter-widgets/controls",
6830
            "_model_module_version": "1.5.0",
6831
            "_model_name": "DescriptionStyleModel",
6832
            "_view_count": null,
6833
            "_view_module": "@jupyter-widgets/base",
6834
            "_view_module_version": "1.2.0",
6835
            "_view_name": "StyleView",
6836
            "description_width": ""
6837
          }
6838
        },
6839
        "dd31f0ed6eee4b6a97d4c9ee6addabe3": {
6840
          "model_module": "@jupyter-widgets/base",
6841
          "model_name": "LayoutModel",
6842
          "model_module_version": "1.2.0",
6843
          "state": {
6844
            "_model_module": "@jupyter-widgets/base",
6845
            "_model_module_version": "1.2.0",
6846
            "_model_name": "LayoutModel",
6847
            "_view_count": null,
6848
            "_view_module": "@jupyter-widgets/base",
6849
            "_view_module_version": "1.2.0",
6850
            "_view_name": "LayoutView",
6851
            "align_content": null,
6852
            "align_items": null,
6853
            "align_self": null,
6854
            "border": null,
6855
            "bottom": null,
6856
            "display": null,
6857
            "flex": null,
6858
            "flex_flow": null,
6859
            "grid_area": null,
6860
            "grid_auto_columns": null,
6861
            "grid_auto_flow": null,
6862
            "grid_auto_rows": null,
6863
            "grid_column": null,
6864
            "grid_gap": null,
6865
            "grid_row": null,
6866
            "grid_template_areas": null,
6867
            "grid_template_columns": null,
6868
            "grid_template_rows": null,
6869
            "height": null,
6870
            "justify_content": null,
6871
            "justify_items": null,
6872
            "left": null,
6873
            "margin": null,
6874
            "max_height": null,
6875
            "max_width": null,
6876
            "min_height": null,
6877
            "min_width": null,
6878
            "object_fit": null,
6879
            "object_position": null,
6880
            "order": null,
6881
            "overflow": null,
6882
            "overflow_x": null,
6883
            "overflow_y": null,
6884
            "padding": null,
6885
            "right": null,
6886
            "top": null,
6887
            "visibility": null,
6888
            "width": null
6889
          }
6890
        },
6891
        "9f072c7c1f7646ef876b49917e1eccff": {
6892
          "model_module": "@jupyter-widgets/controls",
6893
          "model_name": "ProgressStyleModel",
6894
          "model_module_version": "1.5.0",
6895
          "state": {
6896
            "_model_module": "@jupyter-widgets/controls",
6897
            "_model_module_version": "1.5.0",
6898
            "_model_name": "ProgressStyleModel",
6899
            "_view_count": null,
6900
            "_view_module": "@jupyter-widgets/base",
6901
            "_view_module_version": "1.2.0",
6902
            "_view_name": "StyleView",
6903
            "bar_color": null,
6904
            "description_width": ""
6905
          }
6906
        },
6907
        "76f09404ea3d447782bb09cd27e9a723": {
6908
          "model_module": "@jupyter-widgets/base",
6909
          "model_name": "LayoutModel",
6910
          "model_module_version": "1.2.0",
6911
          "state": {
6912
            "_model_module": "@jupyter-widgets/base",
6913
            "_model_module_version": "1.2.0",
6914
            "_model_name": "LayoutModel",
6915
            "_view_count": null,
6916
            "_view_module": "@jupyter-widgets/base",
6917
            "_view_module_version": "1.2.0",
6918
            "_view_name": "LayoutView",
6919
            "align_content": null,
6920
            "align_items": null,
6921
            "align_self": null,
6922
            "border": null,
6923
            "bottom": null,
6924
            "display": null,
6925
            "flex": null,
6926
            "flex_flow": null,
6927
            "grid_area": null,
6928
            "grid_auto_columns": null,
6929
            "grid_auto_flow": null,
6930
            "grid_auto_rows": null,
6931
            "grid_column": null,
6932
            "grid_gap": null,
6933
            "grid_row": null,
6934
            "grid_template_areas": null,
6935
            "grid_template_columns": null,
6936
            "grid_template_rows": null,
6937
            "height": null,
6938
            "justify_content": null,
6939
            "justify_items": null,
6940
            "left": null,
6941
            "margin": null,
6942
            "max_height": null,
6943
            "max_width": null,
6944
            "min_height": null,
6945
            "min_width": null,
6946
            "object_fit": null,
6947
            "object_position": null,
6948
            "order": null,
6949
            "overflow": null,
6950
            "overflow_x": null,
6951
            "overflow_y": null,
6952
            "padding": null,
6953
            "right": null,
6954
            "top": null,
6955
            "visibility": null,
6956
            "width": null
6957
          }
6958
        },
6959
        "f2356694803b4c678de2ca0a11cb11e2": {
6960
          "model_module": "@jupyter-widgets/controls",
6961
          "model_name": "DescriptionStyleModel",
6962
          "model_module_version": "1.5.0",
6963
          "state": {
6964
            "_model_module": "@jupyter-widgets/controls",
6965
            "_model_module_version": "1.5.0",
6966
            "_model_name": "DescriptionStyleModel",
6967
            "_view_count": null,
6968
            "_view_module": "@jupyter-widgets/base",
6969
            "_view_module_version": "1.2.0",
6970
            "_view_name": "StyleView",
6971
            "description_width": ""
6972
          }
6973
        },
6974
        "c070b80f8b404d7aa1945ce74f04e669": {
6975
          "model_module": "@jupyter-widgets/controls",
6976
          "model_name": "HBoxModel",
6977
          "model_module_version": "1.5.0",
6978
          "state": {
6979
            "_dom_classes": [],
6980
            "_model_module": "@jupyter-widgets/controls",
6981
            "_model_module_version": "1.5.0",
6982
            "_model_name": "HBoxModel",
6983
            "_view_count": null,
6984
            "_view_module": "@jupyter-widgets/controls",
6985
            "_view_module_version": "1.5.0",
6986
            "_view_name": "HBoxView",
6987
            "box_style": "",
6988
            "children": [
6989
              "IPY_MODEL_efd95411b98444c39eacc1b3ba12cf9e",
6990
              "IPY_MODEL_f3b4b928a53444cf9133d950c79be90a",
6991
              "IPY_MODEL_8ce9a6d482554cff8218b795a2b7afe5"
6992
            ],
6993
            "layout": "IPY_MODEL_4410cb65ce684026bac9d6bce8204641"
6994
          }
6995
        },
6996
        "efd95411b98444c39eacc1b3ba12cf9e": {
6997
          "model_module": "@jupyter-widgets/controls",
6998
          "model_name": "HTMLModel",
6999
          "model_module_version": "1.5.0",
7000
          "state": {
7001
            "_dom_classes": [],
7002
            "_model_module": "@jupyter-widgets/controls",
7003
            "_model_module_version": "1.5.0",
7004
            "_model_name": "HTMLModel",
7005
            "_view_count": null,
7006
            "_view_module": "@jupyter-widgets/controls",
7007
            "_view_module_version": "1.5.0",
7008
            "_view_name": "HTMLView",
7009
            "description": "",
7010
            "description_tooltip": null,
7011
            "layout": "IPY_MODEL_bbff96b8aab24b1dacce61988d890bde",
7012
            "placeholder": "​",
7013
            "style": "IPY_MODEL_9b46df12fda94d0193f51b1517316468",
7014
            "value": "Map: 100%"
7015
          }
7016
        },
7017
        "f3b4b928a53444cf9133d950c79be90a": {
7018
          "model_module": "@jupyter-widgets/controls",
7019
          "model_name": "FloatProgressModel",
7020
          "model_module_version": "1.5.0",
7021
          "state": {
7022
            "_dom_classes": [],
7023
            "_model_module": "@jupyter-widgets/controls",
7024
            "_model_module_version": "1.5.0",
7025
            "_model_name": "FloatProgressModel",
7026
            "_view_count": null,
7027
            "_view_module": "@jupyter-widgets/controls",
7028
            "_view_module_version": "1.5.0",
7029
            "_view_name": "ProgressView",
7030
            "bar_style": "success",
7031
            "description": "",
7032
            "description_tooltip": null,
7033
            "layout": "IPY_MODEL_2a1d2c6b26384a14903f1a186de1f32f",
7034
            "max": 50,
7035
            "min": 0,
7036
            "orientation": "horizontal",
7037
            "style": "IPY_MODEL_0458e8b753dd4412bda534234c52e48f",
7038
            "value": 50
7039
          }
7040
        },
7041
        "8ce9a6d482554cff8218b795a2b7afe5": {
7042
          "model_module": "@jupyter-widgets/controls",
7043
          "model_name": "HTMLModel",
7044
          "model_module_version": "1.5.0",
7045
          "state": {
7046
            "_dom_classes": [],
7047
            "_model_module": "@jupyter-widgets/controls",
7048
            "_model_module_version": "1.5.0",
7049
            "_model_name": "HTMLModel",
7050
            "_view_count": null,
7051
            "_view_module": "@jupyter-widgets/controls",
7052
            "_view_module_version": "1.5.0",
7053
            "_view_name": "HTMLView",
7054
            "description": "",
7055
            "description_tooltip": null,
7056
            "layout": "IPY_MODEL_0ed9463a4fc74d1daf25e7273d2025da",
7057
            "placeholder": "​",
7058
            "style": "IPY_MODEL_6f26495add4a4869abf3b830919feeb0",
7059
            "value": " 50/50 [00:00&lt;00:00, 944.02 examples/s]"
7060
          }
7061
        },
7062
        "4410cb65ce684026bac9d6bce8204641": {
7063
          "model_module": "@jupyter-widgets/base",
7064
          "model_name": "LayoutModel",
7065
          "model_module_version": "1.2.0",
7066
          "state": {
7067
            "_model_module": "@jupyter-widgets/base",
7068
            "_model_module_version": "1.2.0",
7069
            "_model_name": "LayoutModel",
7070
            "_view_count": null,
7071
            "_view_module": "@jupyter-widgets/base",
7072
            "_view_module_version": "1.2.0",
7073
            "_view_name": "LayoutView",
7074
            "align_content": null,
7075
            "align_items": null,
7076
            "align_self": null,
7077
            "border": null,
7078
            "bottom": null,
7079
            "display": null,
7080
            "flex": null,
7081
            "flex_flow": null,
7082
            "grid_area": null,
7083
            "grid_auto_columns": null,
7084
            "grid_auto_flow": null,
7085
            "grid_auto_rows": null,
7086
            "grid_column": null,
7087
            "grid_gap": null,
7088
            "grid_row": null,
7089
            "grid_template_areas": null,
7090
            "grid_template_columns": null,
7091
            "grid_template_rows": null,
7092
            "height": null,
7093
            "justify_content": null,
7094
            "justify_items": null,
7095
            "left": null,
7096
            "margin": null,
7097
            "max_height": null,
7098
            "max_width": null,
7099
            "min_height": null,
7100
            "min_width": null,
7101
            "object_fit": null,
7102
            "object_position": null,
7103
            "order": null,
7104
            "overflow": null,
7105
            "overflow_x": null,
7106
            "overflow_y": null,
7107
            "padding": null,
7108
            "right": null,
7109
            "top": null,
7110
            "visibility": null,
7111
            "width": null
7112
          }
7113
        },
7114
        "bbff96b8aab24b1dacce61988d890bde": {
7115
          "model_module": "@jupyter-widgets/base",
7116
          "model_name": "LayoutModel",
7117
          "model_module_version": "1.2.0",
7118
          "state": {
7119
            "_model_module": "@jupyter-widgets/base",
7120
            "_model_module_version": "1.2.0",
7121
            "_model_name": "LayoutModel",
7122
            "_view_count": null,
7123
            "_view_module": "@jupyter-widgets/base",
7124
            "_view_module_version": "1.2.0",
7125
            "_view_name": "LayoutView",
7126
            "align_content": null,
7127
            "align_items": null,
7128
            "align_self": null,
7129
            "border": null,
7130
            "bottom": null,
7131
            "display": null,
7132
            "flex": null,
7133
            "flex_flow": null,
7134
            "grid_area": null,
7135
            "grid_auto_columns": null,
7136
            "grid_auto_flow": null,
7137
            "grid_auto_rows": null,
7138
            "grid_column": null,
7139
            "grid_gap": null,
7140
            "grid_row": null,
7141
            "grid_template_areas": null,
7142
            "grid_template_columns": null,
7143
            "grid_template_rows": null,
7144
            "height": null,
7145
            "justify_content": null,
7146
            "justify_items": null,
7147
            "left": null,
7148
            "margin": null,
7149
            "max_height": null,
7150
            "max_width": null,
7151
            "min_height": null,
7152
            "min_width": null,
7153
            "object_fit": null,
7154
            "object_position": null,
7155
            "order": null,
7156
            "overflow": null,
7157
            "overflow_x": null,
7158
            "overflow_y": null,
7159
            "padding": null,
7160
            "right": null,
7161
            "top": null,
7162
            "visibility": null,
7163
            "width": null
7164
          }
7165
        },
7166
        "9b46df12fda94d0193f51b1517316468": {
7167
          "model_module": "@jupyter-widgets/controls",
7168
          "model_name": "DescriptionStyleModel",
7169
          "model_module_version": "1.5.0",
7170
          "state": {
7171
            "_model_module": "@jupyter-widgets/controls",
7172
            "_model_module_version": "1.5.0",
7173
            "_model_name": "DescriptionStyleModel",
7174
            "_view_count": null,
7175
            "_view_module": "@jupyter-widgets/base",
7176
            "_view_module_version": "1.2.0",
7177
            "_view_name": "StyleView",
7178
            "description_width": ""
7179
          }
7180
        },
7181
        "2a1d2c6b26384a14903f1a186de1f32f": {
7182
          "model_module": "@jupyter-widgets/base",
7183
          "model_name": "LayoutModel",
7184
          "model_module_version": "1.2.0",
7185
          "state": {
7186
            "_model_module": "@jupyter-widgets/base",
7187
            "_model_module_version": "1.2.0",
7188
            "_model_name": "LayoutModel",
7189
            "_view_count": null,
7190
            "_view_module": "@jupyter-widgets/base",
7191
            "_view_module_version": "1.2.0",
7192
            "_view_name": "LayoutView",
7193
            "align_content": null,
7194
            "align_items": null,
7195
            "align_self": null,
7196
            "border": null,
7197
            "bottom": null,
7198
            "display": null,
7199
            "flex": null,
7200
            "flex_flow": null,
7201
            "grid_area": null,
7202
            "grid_auto_columns": null,
7203
            "grid_auto_flow": null,
7204
            "grid_auto_rows": null,
7205
            "grid_column": null,
7206
            "grid_gap": null,
7207
            "grid_row": null,
7208
            "grid_template_areas": null,
7209
            "grid_template_columns": null,
7210
            "grid_template_rows": null,
7211
            "height": null,
7212
            "justify_content": null,
7213
            "justify_items": null,
7214
            "left": null,
7215
            "margin": null,
7216
            "max_height": null,
7217
            "max_width": null,
7218
            "min_height": null,
7219
            "min_width": null,
7220
            "object_fit": null,
7221
            "object_position": null,
7222
            "order": null,
7223
            "overflow": null,
7224
            "overflow_x": null,
7225
            "overflow_y": null,
7226
            "padding": null,
7227
            "right": null,
7228
            "top": null,
7229
            "visibility": null,
7230
            "width": null
7231
          }
7232
        },
7233
        "0458e8b753dd4412bda534234c52e48f": {
7234
          "model_module": "@jupyter-widgets/controls",
7235
          "model_name": "ProgressStyleModel",
7236
          "model_module_version": "1.5.0",
7237
          "state": {
7238
            "_model_module": "@jupyter-widgets/controls",
7239
            "_model_module_version": "1.5.0",
7240
            "_model_name": "ProgressStyleModel",
7241
            "_view_count": null,
7242
            "_view_module": "@jupyter-widgets/base",
7243
            "_view_module_version": "1.2.0",
7244
            "_view_name": "StyleView",
7245
            "bar_color": null,
7246
            "description_width": ""
7247
          }
7248
        },
7249
        "0ed9463a4fc74d1daf25e7273d2025da": {
7250
          "model_module": "@jupyter-widgets/base",
7251
          "model_name": "LayoutModel",
7252
          "model_module_version": "1.2.0",
7253
          "state": {
7254
            "_model_module": "@jupyter-widgets/base",
7255
            "_model_module_version": "1.2.0",
7256
            "_model_name": "LayoutModel",
7257
            "_view_count": null,
7258
            "_view_module": "@jupyter-widgets/base",
7259
            "_view_module_version": "1.2.0",
7260
            "_view_name": "LayoutView",
7261
            "align_content": null,
7262
            "align_items": null,
7263
            "align_self": null,
7264
            "border": null,
7265
            "bottom": null,
7266
            "display": null,
7267
            "flex": null,
7268
            "flex_flow": null,
7269
            "grid_area": null,
7270
            "grid_auto_columns": null,
7271
            "grid_auto_flow": null,
7272
            "grid_auto_rows": null,
7273
            "grid_column": null,
7274
            "grid_gap": null,
7275
            "grid_row": null,
7276
            "grid_template_areas": null,
7277
            "grid_template_columns": null,
7278
            "grid_template_rows": null,
7279
            "height": null,
7280
            "justify_content": null,
7281
            "justify_items": null,
7282
            "left": null,
7283
            "margin": null,
7284
            "max_height": null,
7285
            "max_width": null,
7286
            "min_height": null,
7287
            "min_width": null,
7288
            "object_fit": null,
7289
            "object_position": null,
7290
            "order": null,
7291
            "overflow": null,
7292
            "overflow_x": null,
7293
            "overflow_y": null,
7294
            "padding": null,
7295
            "right": null,
7296
            "top": null,
7297
            "visibility": null,
7298
            "width": null
7299
          }
7300
        },
7301
        "6f26495add4a4869abf3b830919feeb0": {
7302
          "model_module": "@jupyter-widgets/controls",
7303
          "model_name": "DescriptionStyleModel",
7304
          "model_module_version": "1.5.0",
7305
          "state": {
7306
            "_model_module": "@jupyter-widgets/controls",
7307
            "_model_module_version": "1.5.0",
7308
            "_model_name": "DescriptionStyleModel",
7309
            "_view_count": null,
7310
            "_view_module": "@jupyter-widgets/base",
7311
            "_view_module_version": "1.2.0",
7312
            "_view_name": "StyleView",
7313
            "description_width": ""
7314
          }
7315
        },
7316
        "a30bdf5afc2b4e078d8200d34b4760d2": {
7317
          "model_module": "@jupyter-widgets/controls",
7318
          "model_name": "HBoxModel",
7319
          "model_module_version": "1.5.0",
7320
          "state": {
7321
            "_dom_classes": [],
7322
            "_model_module": "@jupyter-widgets/controls",
7323
            "_model_module_version": "1.5.0",
7324
            "_model_name": "HBoxModel",
7325
            "_view_count": null,
7326
            "_view_module": "@jupyter-widgets/controls",
7327
            "_view_module_version": "1.5.0",
7328
            "_view_name": "HBoxView",
7329
            "box_style": "",
7330
            "children": [
7331
              "IPY_MODEL_7c864990c29f4f598cd3fdd02550d7b5",
7332
              "IPY_MODEL_63640367c9494489a91988bb7c22b2b7",
7333
              "IPY_MODEL_177e85135bd644c5b6f283237dd09361"
7334
            ],
7335
            "layout": "IPY_MODEL_9b5c5002cf264eb899c48390494d167a"
7336
          }
7337
        },
7338
        "7c864990c29f4f598cd3fdd02550d7b5": {
7339
          "model_module": "@jupyter-widgets/controls",
7340
          "model_name": "HTMLModel",
7341
          "model_module_version": "1.5.0",
7342
          "state": {
7343
            "_dom_classes": [],
7344
            "_model_module": "@jupyter-widgets/controls",
7345
            "_model_module_version": "1.5.0",
7346
            "_model_name": "HTMLModel",
7347
            "_view_count": null,
7348
            "_view_module": "@jupyter-widgets/controls",
7349
            "_view_module_version": "1.5.0",
7350
            "_view_name": "HTMLView",
7351
            "description": "",
7352
            "description_tooltip": null,
7353
            "layout": "IPY_MODEL_d6d2c07d5f46410dbab13d86741f2be7",
7354
            "placeholder": "​",
7355
            "style": "IPY_MODEL_cab0297928774994b780ec733557af07",
7356
            "value": "Map: 100%"
7357
          }
7358
        },
7359
        "63640367c9494489a91988bb7c22b2b7": {
7360
          "model_module": "@jupyter-widgets/controls",
7361
          "model_name": "FloatProgressModel",
7362
          "model_module_version": "1.5.0",
7363
          "state": {
7364
            "_dom_classes": [],
7365
            "_model_module": "@jupyter-widgets/controls",
7366
            "_model_module_version": "1.5.0",
7367
            "_model_name": "FloatProgressModel",
7368
            "_view_count": null,
7369
            "_view_module": "@jupyter-widgets/controls",
7370
            "_view_module_version": "1.5.0",
7371
            "_view_name": "ProgressView",
7372
            "bar_style": "success",
7373
            "description": "",
7374
            "description_tooltip": null,
7375
            "layout": "IPY_MODEL_8b4d9c95b13c4c21bdd1c0295fc2be49",
7376
            "max": 50,
7377
            "min": 0,
7378
            "orientation": "horizontal",
7379
            "style": "IPY_MODEL_e246490ae7b14adbaa0e7a45fd177721",
7380
            "value": 50
7381
          }
7382
        },
7383
        "177e85135bd644c5b6f283237dd09361": {
7384
          "model_module": "@jupyter-widgets/controls",
7385
          "model_name": "HTMLModel",
7386
          "model_module_version": "1.5.0",
7387
          "state": {
7388
            "_dom_classes": [],
7389
            "_model_module": "@jupyter-widgets/controls",
7390
            "_model_module_version": "1.5.0",
7391
            "_model_name": "HTMLModel",
7392
            "_view_count": null,
7393
            "_view_module": "@jupyter-widgets/controls",
7394
            "_view_module_version": "1.5.0",
7395
            "_view_name": "HTMLView",
7396
            "description": "",
7397
            "description_tooltip": null,
7398
            "layout": "IPY_MODEL_fced4d6e50b44fee950d1436e5618f82",
7399
            "placeholder": "​",
7400
            "style": "IPY_MODEL_12d7befb6aa644c7acad49e4e1c93167",
7401
            "value": " 50/50 [00:00&lt;00:00, 603.46 examples/s]"
7402
          }
7403
        },
7404
        "9b5c5002cf264eb899c48390494d167a": {
7405
          "model_module": "@jupyter-widgets/base",
7406
          "model_name": "LayoutModel",
7407
          "model_module_version": "1.2.0",
7408
          "state": {
7409
            "_model_module": "@jupyter-widgets/base",
7410
            "_model_module_version": "1.2.0",
7411
            "_model_name": "LayoutModel",
7412
            "_view_count": null,
7413
            "_view_module": "@jupyter-widgets/base",
7414
            "_view_module_version": "1.2.0",
7415
            "_view_name": "LayoutView",
7416
            "align_content": null,
7417
            "align_items": null,
7418
            "align_self": null,
7419
            "border": null,
7420
            "bottom": null,
7421
            "display": null,
7422
            "flex": null,
7423
            "flex_flow": null,
7424
            "grid_area": null,
7425
            "grid_auto_columns": null,
7426
            "grid_auto_flow": null,
7427
            "grid_auto_rows": null,
7428
            "grid_column": null,
7429
            "grid_gap": null,
7430
            "grid_row": null,
7431
            "grid_template_areas": null,
7432
            "grid_template_columns": null,
7433
            "grid_template_rows": null,
7434
            "height": null,
7435
            "justify_content": null,
7436
            "justify_items": null,
7437
            "left": null,
7438
            "margin": null,
7439
            "max_height": null,
7440
            "max_width": null,
7441
            "min_height": null,
7442
            "min_width": null,
7443
            "object_fit": null,
7444
            "object_position": null,
7445
            "order": null,
7446
            "overflow": null,
7447
            "overflow_x": null,
7448
            "overflow_y": null,
7449
            "padding": null,
7450
            "right": null,
7451
            "top": null,
7452
            "visibility": null,
7453
            "width": null
7454
          }
7455
        },
7456
        "d6d2c07d5f46410dbab13d86741f2be7": {
7457
          "model_module": "@jupyter-widgets/base",
7458
          "model_name": "LayoutModel",
7459
          "model_module_version": "1.2.0",
7460
          "state": {
7461
            "_model_module": "@jupyter-widgets/base",
7462
            "_model_module_version": "1.2.0",
7463
            "_model_name": "LayoutModel",
7464
            "_view_count": null,
7465
            "_view_module": "@jupyter-widgets/base",
7466
            "_view_module_version": "1.2.0",
7467
            "_view_name": "LayoutView",
7468
            "align_content": null,
7469
            "align_items": null,
7470
            "align_self": null,
7471
            "border": null,
7472
            "bottom": null,
7473
            "display": null,
7474
            "flex": null,
7475
            "flex_flow": null,
7476
            "grid_area": null,
7477
            "grid_auto_columns": null,
7478
            "grid_auto_flow": null,
7479
            "grid_auto_rows": null,
7480
            "grid_column": null,
7481
            "grid_gap": null,
7482
            "grid_row": null,
7483
            "grid_template_areas": null,
7484
            "grid_template_columns": null,
7485
            "grid_template_rows": null,
7486
            "height": null,
7487
            "justify_content": null,
7488
            "justify_items": null,
7489
            "left": null,
7490
            "margin": null,
7491
            "max_height": null,
7492
            "max_width": null,
7493
            "min_height": null,
7494
            "min_width": null,
7495
            "object_fit": null,
7496
            "object_position": null,
7497
            "order": null,
7498
            "overflow": null,
7499
            "overflow_x": null,
7500
            "overflow_y": null,
7501
            "padding": null,
7502
            "right": null,
7503
            "top": null,
7504
            "visibility": null,
7505
            "width": null
7506
          }
7507
        },
7508
        "cab0297928774994b780ec733557af07": {
7509
          "model_module": "@jupyter-widgets/controls",
7510
          "model_name": "DescriptionStyleModel",
7511
          "model_module_version": "1.5.0",
7512
          "state": {
7513
            "_model_module": "@jupyter-widgets/controls",
7514
            "_model_module_version": "1.5.0",
7515
            "_model_name": "DescriptionStyleModel",
7516
            "_view_count": null,
7517
            "_view_module": "@jupyter-widgets/base",
7518
            "_view_module_version": "1.2.0",
7519
            "_view_name": "StyleView",
7520
            "description_width": ""
7521
          }
7522
        },
7523
        "8b4d9c95b13c4c21bdd1c0295fc2be49": {
7524
          "model_module": "@jupyter-widgets/base",
7525
          "model_name": "LayoutModel",
7526
          "model_module_version": "1.2.0",
7527
          "state": {
7528
            "_model_module": "@jupyter-widgets/base",
7529
            "_model_module_version": "1.2.0",
7530
            "_model_name": "LayoutModel",
7531
            "_view_count": null,
7532
            "_view_module": "@jupyter-widgets/base",
7533
            "_view_module_version": "1.2.0",
7534
            "_view_name": "LayoutView",
7535
            "align_content": null,
7536
            "align_items": null,
7537
            "align_self": null,
7538
            "border": null,
7539
            "bottom": null,
7540
            "display": null,
7541
            "flex": null,
7542
            "flex_flow": null,
7543
            "grid_area": null,
7544
            "grid_auto_columns": null,
7545
            "grid_auto_flow": null,
7546
            "grid_auto_rows": null,
7547
            "grid_column": null,
7548
            "grid_gap": null,
7549
            "grid_row": null,
7550
            "grid_template_areas": null,
7551
            "grid_template_columns": null,
7552
            "grid_template_rows": null,
7553
            "height": null,
7554
            "justify_content": null,
7555
            "justify_items": null,
7556
            "left": null,
7557
            "margin": null,
7558
            "max_height": null,
7559
            "max_width": null,
7560
            "min_height": null,
7561
            "min_width": null,
7562
            "object_fit": null,
7563
            "object_position": null,
7564
            "order": null,
7565
            "overflow": null,
7566
            "overflow_x": null,
7567
            "overflow_y": null,
7568
            "padding": null,
7569
            "right": null,
7570
            "top": null,
7571
            "visibility": null,
7572
            "width": null
7573
          }
7574
        },
7575
        "e246490ae7b14adbaa0e7a45fd177721": {
7576
          "model_module": "@jupyter-widgets/controls",
7577
          "model_name": "ProgressStyleModel",
7578
          "model_module_version": "1.5.0",
7579
          "state": {
7580
            "_model_module": "@jupyter-widgets/controls",
7581
            "_model_module_version": "1.5.0",
7582
            "_model_name": "ProgressStyleModel",
7583
            "_view_count": null,
7584
            "_view_module": "@jupyter-widgets/base",
7585
            "_view_module_version": "1.2.0",
7586
            "_view_name": "StyleView",
7587
            "bar_color": null,
7588
            "description_width": ""
7589
          }
7590
        },
7591
        "fced4d6e50b44fee950d1436e5618f82": {
7592
          "model_module": "@jupyter-widgets/base",
7593
          "model_name": "LayoutModel",
7594
          "model_module_version": "1.2.0",
7595
          "state": {
7596
            "_model_module": "@jupyter-widgets/base",
7597
            "_model_module_version": "1.2.0",
7598
            "_model_name": "LayoutModel",
7599
            "_view_count": null,
7600
            "_view_module": "@jupyter-widgets/base",
7601
            "_view_module_version": "1.2.0",
7602
            "_view_name": "LayoutView",
7603
            "align_content": null,
7604
            "align_items": null,
7605
            "align_self": null,
7606
            "border": null,
7607
            "bottom": null,
7608
            "display": null,
7609
            "flex": null,
7610
            "flex_flow": null,
7611
            "grid_area": null,
7612
            "grid_auto_columns": null,
7613
            "grid_auto_flow": null,
7614
            "grid_auto_rows": null,
7615
            "grid_column": null,
7616
            "grid_gap": null,
7617
            "grid_row": null,
7618
            "grid_template_areas": null,
7619
            "grid_template_columns": null,
7620
            "grid_template_rows": null,
7621
            "height": null,
7622
            "justify_content": null,
7623
            "justify_items": null,
7624
            "left": null,
7625
            "margin": null,
7626
            "max_height": null,
7627
            "max_width": null,
7628
            "min_height": null,
7629
            "min_width": null,
7630
            "object_fit": null,
7631
            "object_position": null,
7632
            "order": null,
7633
            "overflow": null,
7634
            "overflow_x": null,
7635
            "overflow_y": null,
7636
            "padding": null,
7637
            "right": null,
7638
            "top": null,
7639
            "visibility": null,
7640
            "width": null
7641
          }
7642
        },
7643
        "12d7befb6aa644c7acad49e4e1c93167": {
7644
          "model_module": "@jupyter-widgets/controls",
7645
          "model_name": "DescriptionStyleModel",
7646
          "model_module_version": "1.5.0",
7647
          "state": {
7648
            "_model_module": "@jupyter-widgets/controls",
7649
            "_model_module_version": "1.5.0",
7650
            "_model_name": "DescriptionStyleModel",
7651
            "_view_count": null,
7652
            "_view_module": "@jupyter-widgets/base",
7653
            "_view_module_version": "1.2.0",
7654
            "_view_name": "StyleView",
7655
            "description_width": ""
7656
          }
7657
        }
7658
      }
7659
    }
7660
  },
7661
  "nbformat": 4,
7662
  "nbformat_minor": 0
7663
}