Switch to side-by-side view

--- a
+++ b/NER Preprocessing and Performance Analysis.ipynb
@@ -0,0 +1,1117 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Parsing Clinical Trial Eligibility Criteria Using Transformers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import json\n",
+    "import pandas as pd\n",
+    "from matplotlib import pyplot as plt\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from shutil import copyfile\n",
+    "import csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from spacy.lang.en import English\n",
+    "nlp = English()\n",
+    "sentencizer = nlp.create_pipe(\"sentencizer\")\n",
+    "nlp.add_pipe(sentencizer)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Chia Preprocessing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "inputpath = f\"chia/chia_with_scope\"\n",
+    "outputpath = f\"chia/chia_bio\"\n",
+    "trainpath = f\"chia/trains\"\n",
+    "testpath = f\"chia/tests\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1000"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Load input files names\n",
+    "inputfiles = set()\n",
+    "for f in os.listdir(inputpath):\n",
+    "    if f.endswith('.ann'):\n",
+    "        inputfiles.add(f.split('.')[0].split('_')[0])\n",
+    "len(inputfiles)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# list of entity types to retain\n",
+    "select_types = ['Condition', 'Value', 'Drug', 'Procedure', 'Measurement', 'Temporal', \\\n",
+    "    'Observation', 'Person', 'Mood', 'Device', 'Pregnancy_considerations']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# convert Brat format into BIO format\n",
+    "# function for getting entity annotations from the annotation file\n",
+    "def get_annotation_entities(ann_file, select_types=None):\n",
+    "    entities = []\n",
+    "    with open(ann_file, \"r\", encoding=\"utf-8\") as f:\n",
+    "        for line in f:\n",
+    "            if line.startswith('T'):\n",
+    "                term = line.strip().split('\\t')[1].split()\n",
+    "                if (select_types != None) and (term[0] not in select_types): continue\n",
+    "                if int(term[-1]) <= int(term[1]): continue\n",
+    "                entities.append((int(term[1]), int(term[-1]), term[0]))\n",
+    "    return sorted(entities, key=lambda x: (x[0], x[1]))\n",
+    "\n",
+    "# function for handling overlap by keeping the entity with largest text span\n",
+    "def remove_overlap_entities(sorted_entities):\n",
+    "    keep_entities = []\n",
+    "    for idx, entity in enumerate(sorted_entities):\n",
+    "        if idx == 0:\n",
+    "            keep_entities.append(entity)\n",
+    "            last_keep = entity\n",
+    "            continue\n",
+    "        if entity[0] < last_keep[1]:\n",
+    "            if entity[1]-entity[0] > last_keep[1]-last_keep[0]:\n",
+    "                last_keep = entity\n",
+    "                keep_entities[-1] = last_keep\n",
+    "        elif entity[0] == last_keep[1]:\n",
+    "            last_keep = (last_keep[0], entity[1], last_keep[-1])\n",
+    "            keep_entities[-1] = last_keep\n",
+    "        else:\n",
+    "            last_keep = entity\n",
+    "            keep_entities.append(entity)\n",
+    "    return keep_entities\n",
+    "\n",
+    "# inverse index of entity annotations\n",
+    "def entity_dictionary(keep_entities, txt_file):\n",
+    "    f_ann = {}\n",
+    "    with open(txt_file, \"r\", encoding=\"utf-8\") as f:\n",
+    "        text = f.readlines()\n",
+    "        if file in ['NCT02348918_exc', 'NCT02348918_inc', 'NCT01735955_exc']:\n",
+    "            text = ' '.join([i.strip() for i in text])\n",
+    "        else:\n",
+    "            text = '  '.join([i.strip() for i in text])\n",
+    "    for entity in keep_entities:\n",
+    "        entity_text = text[entity[0]:entity[1]]\n",
+    "        doc = nlp(entity_text)\n",
+    "        token_starts = [(i, doc[i:].start_char) for i in range(len(doc))]\n",
+    "        term_type = entity[-1]\n",
+    "        term_offset = entity[0]\n",
+    "        for i, token in enumerate(doc):\n",
+    "            ann_offset = token_starts[i][1]+term_offset\n",
+    "            if ann_offset not in f_ann:\n",
+    "                f_ann[ann_offset] = [i, token.text, term_type]\n",
+    "    return f_ann\n",
+    "\n",
+    "# Brat -> BIO format conversion\n",
+    "for infile in inputfiles:\n",
+    "    for t in [\"exc\", \"inc\"]:\n",
+    "        file = f\"{infile}_{t}\"\n",
+    "        ann_file = f\"{inputpath}/{file}.ann\"\n",
+    "        txt_file = f\"{inputpath}/{file}.txt\"\n",
+    "        out_file = f\"{outputpath}/{file}.bio.txt\"\n",
+    "        sorted_entities = get_annotation_entities(ann_file, select_types)\n",
+    "        keep_entities = remove_overlap_entities(sorted_entities)\n",
+    "        f_ann = entity_dictionary(keep_entities, txt_file)\n",
+    "        with open(out_file, \"w\", encoding=\"utf-8\") as f_out:\n",
+    "            with open(txt_file, \"r\", encoding=\"utf-8\") as f:\n",
+    "                sent_offset = 0\n",
+    "                for line in f:\n",
+    "                    # print(line.strip())\n",
+    "                    if '⁄' in line:\n",
+    "                        # print(txt_file)\n",
+    "                        line = line.replace('⁄', '/') # replace non unicode characters\n",
+    "                    doc = nlp(line.strip())\n",
+    "                    token_starts = [(i, doc[i:].start_char) for i in range(len(doc))]\n",
+    "                    for token in doc:\n",
+    "                        token_sent_offset = token_starts[token.i][1]\n",
+    "                        token_doc_offset = token_starts[token.i][1]+sent_offset\n",
+    "                        if token_doc_offset in f_ann:\n",
+    "                            if f_ann[token_doc_offset][0] == 0:\n",
+    "                                label = f\"B-{f_ann[token_doc_offset][2]}\"\n",
+    "                            else:\n",
+    "                                label = f\"I-{f_ann[token_doc_offset][2]}\"\n",
+    "                        else:\n",
+    "                            label = f\"O\"\n",
+    "                        # print(token.text, token_sent_offset, token_sent_offset+len(token.text), token_doc_offset, token_doc_offset+len(token.text), label)\n",
+    "                        f_out.write(f\"{token.text} {token_sent_offset} {token_sent_offset+len(token.text)} {token_doc_offset} {token_doc_offset+len(token.text)} {label}\\n\")\n",
+    "                    # print('\\n')\n",
+    "                    f_out.write('\\n')\n",
+    "                    if file in ['NCT02348918_exc', 'NCT02348918_inc', 'NCT01735955_exc']: # 3 trials with inconsistent offsets\n",
+    "                        sent_offset += (len(line.strip())+1)\n",
+    "                    else:\n",
+    "                        sent_offset += (len(line.strip())+2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "800 100 100\n"
+     ]
+    }
+   ],
+   "source": [
+    "# dataset separation: 800 trials (80%) for training, 100 trials (10%) for validation and 100 trials (10%) for testing\n",
+    "train_ids, dev_ids = train_test_split(list(inputfiles), train_size=0.8, random_state=13, shuffle=True)\n",
+    "dev_ids, test_ids = train_test_split(dev_ids, train_size=0.5, random_state=13, shuffle=True)\n",
+    "print(len(train_ids), len(dev_ids), len(test_ids))\n",
+    "chia_datasets = {\"train\":train_ids, \"dev\":dev_ids, \"test\":test_ids}\n",
+    "json.dump(chia_datasets, open(\"chia/chia_datasets.json\", \"w\", encoding=\"utf-8\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Merge BIO format train, validation and test datasets\n",
+    "# chia_datasets = json.load(open(\"chia/chia_datasets.json\", \"r\", encoding=\"utf-8\"))\n",
+    "# merge the train dataset\n",
+    "with open(\"chia/train.txt\", \"w\", encoding=\"utf-8\") as f:\n",
+    "    for fid in chia_datasets[\"train\"]:\n",
+    "        copyfile(f\"{outputpath}/{fid}_exc.bio.txt\", f\"{trainpath}/{fid}_exc.bio.txt\")\n",
+    "        copyfile(f\"{outputpath}/{fid}_inc.bio.txt\", f\"{trainpath}/{fid}_inc.bio.txt\")\n",
+    "        with open(f\"{outputpath}/{fid}_exc.bio.txt\", \"r\", encoding=\"utf-8\") as fr:\n",
+    "            txt = fr.read().strip()\n",
+    "            if txt != '':\n",
+    "                f.write(txt)\n",
+    "                f.write(\"\\n\\n\")\n",
+    "        with open(f\"{outputpath}/{fid}_inc.bio.txt\", \"r\", encoding=\"utf-8\") as fr:\n",
+    "            txt = fr.read().strip()\n",
+    "            if txt != '':\n",
+    "                f.write(txt)\n",
+    "                f.write(\"\\n\\n\")\n",
+    "\n",
+    "# merge the validation dataset\n",
+    "with open(\"chia/dev.txt\", \"w\", encoding=\"utf-8\") as f:\n",
+    "    for fid in chia_datasets[\"dev\"]:\n",
+    "        copyfile(f\"{outputpath}/{fid}_exc.bio.txt\", f\"{trainpath}/{fid}_exc.bio.txt\")\n",
+    "        copyfile(f\"{outputpath}/{fid}_inc.bio.txt\", f\"{trainpath}/{fid}_inc.bio.txt\")\n",
+    "        with open(f\"{outputpath}/{fid}_exc.bio.txt\", \"r\", encoding=\"utf-8\") as fr:\n",
+    "            txt = fr.read().strip()\n",
+    "            if txt != '':\n",
+    "                f.write(txt)\n",
+    "                f.write(\"\\n\\n\")\n",
+    "        with open(f\"{outputpath}/{fid}_inc.bio.txt\", \"r\", encoding=\"utf-8\") as fr:\n",
+    "            txt = fr.read().strip()\n",
+    "            if txt != '':\n",
+    "                f.write(txt)\n",
+    "                f.write(\"\\n\\n\")\n",
+    "\n",
+    "# merge the test dataset\n",
+    "with open(\"chia/test.txt\", \"w\", encoding=\"utf-8\") as f:\n",
+    "    for fid in chia_datasets[\"test\"]:\n",
+    "        copyfile(f\"{outputpath}/{fid}_exc.bio.txt\", f\"{testpath}/{fid}_exc.bio.txt\")\n",
+    "        copyfile(f\"{outputpath}/{fid}_inc.bio.txt\", f\"{testpath}/{fid}_inc.bio.txt\")\n",
+    "        with open(f\"{outputpath}/{fid}_exc.bio.txt\", \"r\", encoding=\"utf-8\") as fr:\n",
+    "            txt = fr.read().strip()\n",
+    "            if txt != '':\n",
+    "                f.write(txt)\n",
+    "                f.write(\"\\n\\n\")\n",
+    "        with open(f\"{outputpath}/{fid}_inc.bio.txt\", \"r\", encoding=\"utf-8\") as fr:\n",
+    "            txt = fr.read().strip()\n",
+    "            if txt != '':\n",
+    "                f.write(txt)\n",
+    "                f.write(\"\\n\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# convert Chia in Brat into format for Att-BiLSTM-CRF model\n",
+    "out_file = f\"chia/chia_ner.tsv\"\n",
+    "with open(out_file, \"w\", encoding=\"utf-8\") as f_out:\n",
+    "    for infile in inputfiles:\n",
+    "        for t in [\"exc\", \"inc\"]:\n",
+    "            file = f\"{infile}_{t}\"\n",
+    "            ann_file = f\"{inputpath}/{file}.ann\"\n",
+    "            txt_file = f\"{inputpath}/{file}.txt\"\n",
+    "            sorted_entities = get_annotation_entities(ann_file, select_types)\n",
+    "            keep_entities = remove_overlap_entities(sorted_entities)\n",
+    "            with open(txt_file, \"r\", encoding=\"utf-8\") as f:\n",
+    "                sent_offset = 0\n",
+    "                for line in f:\n",
+    "                    # print(line.strip())\n",
+    "                    if '⁄' in line: line = line.replace('⁄', '/')\n",
+    "                    sent_end = sent_offset + len(line)\n",
+    "                    sent_ents = []\n",
+    "                    for ent in keep_entities:\n",
+    "                        if ent[0] < sent_offset or ent[1] < sent_offset: continue\n",
+    "                        if ent[0] >= sent_end or ent[1] > sent_offset+len(line.strip()): break\n",
+    "                        ent_start = ent[0]-sent_offset+1\n",
+    "                        ent_end = ent[1]-sent_offset+1\n",
+    "                        sent_ents.append(f\"{ent_start}:{ent_end}:{ent[2].lower()}\")\n",
+    "                    if sent_ents == []:\n",
+    "                        if file in ['NCT02348918_exc', 'NCT02348918_inc', 'NCT01735955_exc']:\n",
+    "                            sent_offset += (len(line.strip())+1)\n",
+    "                        else:\n",
+    "                            sent_offset += (len(line.strip())+2)\n",
+    "                        continue\n",
+    "                    # print(f\"{file}\\t{','.join(sent_ents)}\\t{line.strip()}\")\n",
+    "                    f_out.write(f\"{file}\\t{','.join(sent_ents)}\\t{line.strip()}\")\n",
+    "                    # print('\\n')\n",
+    "                    f_out.write('\\n')\n",
+    "                    if file in ['NCT02348918_exc', 'NCT02348918_inc', 'NCT01735955_exc']:\n",
+    "                        sent_offset += (len(line.strip())+1)\n",
+    "                    else:\n",
+    "                        sent_offset += (len(line.strip())+2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# split Chia in format for Att-BiLSTM-CRF model into train, validation and test datasets\n",
+    "# chia_datasets = json.load(open(\"chia/chia_datasets.json\", \"r\", encoding=\"utf-8\"))\n",
+    "with open(\"chia/chia_ner_train.tsv\", \"w\", encoding=\"utf-8\") as ftrain, open(\"chia/chia_ner_dev.tsv\", \"w\", encoding=\"utf-8\") as fdev, open(\"chia/chia_ner_test.tsv\", \"w\", encoding=\"utf-8\") as ftest:\n",
+    "    with open(\"chia/chia_ner.tsv\", \"r\", encoding=\"utf-8\") as fread:\n",
+    "        for line in fread:\n",
+    "            if line.split('\\t', 1)[0].split(\"_\")[0] in chia_datasets[\"train\"]:\n",
+    "                ftrain.write(line)\n",
+    "            elif line.split('\\t', 1)[0].split(\"_\")[0] in chia_datasets[\"dev\"]:\n",
+    "                fdev.write(line)\n",
+    "            else:\n",
+    "                ftest.write(line)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Facebook Research Data (FRD) Preprocessing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fbnerfile = f\"fbner/medical_ner.tsv\"\n",
+    "outputpath = f\"fbner/fb_bio\"\n",
+    "trainpath = f\"fbner/trains\"\n",
+    "testpath = f\"fbner/tests\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# loading FRD data\n",
+    "fbner = {}\n",
+    "with open(fbnerfile, \"r\", encoding='utf-8') as f:\n",
+    "    for line in f:\n",
+    "        line = line.strip().split('\\t')\n",
+    "        ents = line[1].split(',')\n",
+    "        ents = [ent.split(':') for ent in ents]\n",
+    "        fbner[line[0]] = fbner.get(line[0], [])\n",
+    "        fbner[line[0]].append({'text':line[-1], 'entities':ents})\n",
+    "json.dump(fbner, open(f\"./fbner/medical_ner.json\", \"w\", encoding=\"utf-8\"))\n",
+    "inputfiles = list(fbner.keys())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# convert FRD into BIO format\n",
+    "for k, v in fbner.items():\n",
+    "    out_file = f\"{outputpath}/{k}.bio.txt\"\n",
+    "    with open(out_file, \"w\", encoding=\"utf-8\") as f_out:\n",
+    "        sent_offset = 0\n",
+    "        for sent in v:\n",
+    "            sent_text = sent['text']\n",
+    "            f_ann = {}\n",
+    "            # the entity location dictionary\n",
+    "            for ent in sent['entities']:\n",
+    "                entity_text = sent_text[int(ent[0])-1:int(ent[1])-1]\n",
+    "                doc = nlp(entity_text)\n",
+    "                token_starts = [(i, doc[i:].start_char) for i in range(len(doc))]\n",
+    "                term_type = ent[-1]\n",
+    "                term_offset = int(ent[0])-1+sent_offset\n",
+    "                for i, token in enumerate(doc):\n",
+    "                    ann_offset = token_starts[i][1]+term_offset\n",
+    "                    if ann_offset not in f_ann:\n",
+    "                        f_ann[ann_offset] = [i, token.text, term_type]\n",
+    "            # convert to bio format\n",
+    "            doc = nlp(sent_text)\n",
+    "            token_starts = [(i, doc[i:].start_char) for i in range(len(doc))]\n",
+    "            for token in doc:\n",
+    "                token_sent_offset = token_starts[token.i][1]\n",
+    "                token_doc_offset = token_starts[token.i][1]+sent_offset\n",
+    "                if token_doc_offset in f_ann:\n",
+    "                    if f_ann[token_doc_offset][0] == 0:\n",
+    "                        label = f\"B-{f_ann[token_doc_offset][2]}\"\n",
+    "                    else:\n",
+    "                        label = f\"I-{f_ann[token_doc_offset][2]}\"\n",
+    "                else:\n",
+    "                    label = f\"O\"\n",
+    "                # print(token.text, token_sent_offset, token_sent_offset+len(token.text), token_doc_offset, token_doc_offset+len(token.text), label)\n",
+    "                f_out.write(f\"{token.text} {token_sent_offset} {token_sent_offset+len(token.text)} {token_doc_offset} {token_doc_offset+len(token.text)} {label}\\n\")\n",
+    "            # print('\\n')\n",
+    "            f_out.write('\\n')\n",
+    "            sent_offset += (len(sent_text)+1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2651 331 332\n"
+     ]
+    }
+   ],
+   "source": [
+    "# split FRD into train, validation and test datasets\n",
+    "train_ids, dev_ids = train_test_split(inputfiles, train_size=0.8, random_state=13, shuffle=True)\n",
+    "dev_ids, test_ids = train_test_split(dev_ids, train_size=0.5, random_state=13, shuffle=True)\n",
+    "print(len(train_ids), len(dev_ids), len(test_ids))\n",
+    "fbner_datasets = {\"train\":train_ids, \"dev\":dev_ids, \"test\":test_ids}\n",
+    "json.dump(fbner_datasets, open(\"fbner/fbner_datasets.json\", \"w\", encoding=\"utf-8\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Merge BIO format train, validation and test datasets\n",
+    "# fbner_datasets = json.dump(open(\"fbner/fbner_datasets.json\", \"r\", encoding=\"utf-8\"))\n",
+    "# merge the train dataset\n",
+    "with open(\"fbner/train.txt\", \"w\", encoding=\"utf-8\") as f:\n",
+    "    for fid in fbner_datasets[\"train\"]:\n",
+    "        copyfile(f\"{outputpath}/{fid}.bio.txt\", f\"{trainpath}/{fid}.bio.txt\")\n",
+    "        with open(f\"{outputpath}/{fid}.bio.txt\", \"r\", encoding=\"utf-8\") as fr:\n",
+    "            txt = fr.read().strip()\n",
+    "            if txt != '':\n",
+    "                f.write(txt)\n",
+    "                f.write(\"\\n\\n\")\n",
+    "\n",
+    "# merge the validation dataset\n",
+    "with open(\"fbner/dev.txt\", \"w\", encoding=\"utf-8\") as f:\n",
+    "    for fid in fbner_datasets[\"dev\"]:\n",
+    "        copyfile(f\"{outputpath}/{fid}.bio.txt\", f\"{trainpath}/{fid}.bio.txt\")\n",
+    "        with open(f\"{outputpath}/{fid}.bio.txt\", \"r\", encoding=\"utf-8\") as fr:\n",
+    "            txt = fr.read().strip()\n",
+    "            if txt != '':\n",
+    "                f.write(txt)\n",
+    "                f.write(\"\\n\\n\")\n",
+    "\n",
+    "# merge the test dataset\n",
+    "with open(\"fbner/test.txt\", \"w\", encoding=\"utf-8\") as f:\n",
+    "    for fid in fbner_datasets[\"test\"]:\n",
+    "        copyfile(f\"{outputpath}/{fid}.bio.txt\", f\"{testpath}/{fid}.bio.txt\")\n",
+    "        with open(f\"{outputpath}/{fid}.bio.txt\", \"r\", encoding=\"utf-8\") as fr:\n",
+    "            txt = fr.read().strip()\n",
+    "            if txt != '':\n",
+    "                f.write(txt)\n",
+    "                f.write(\"\\n\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# resplit the processed data\n",
+    "# fbner_datasets = json.load(open(\"fbner/fbner_datasets.json\", \"r\", encoding=\"utf-8\"))\n",
+    "\n",
+    "with open(\"fbner/fbner_ner_train.tsv\", \"w\", encoding=\"utf-8\") as ftrain, open(\"fbner/fbner_ner_dev.tsv\", \"w\", encoding=\"utf-8\") as fdev, open(\"fbner/fbner_ner_test.tsv\", \"w\", encoding=\"utf-8\") as ftest:\n",
+    "    with open(\"fbner/medical_ner.tsv\", \"r\", encoding=\"utf-8\") as fread:\n",
+    "        for line in fread:\n",
+    "            if line.split('\\t', 1)[0] in fbner_datasets[\"train\"]:\n",
+    "                ftrain.write(line)\n",
+    "            elif line.split('\\t', 1)[0] in fbner_datasets[\"dev\"]:\n",
+    "                fdev.write(line)\n",
+    "            else:\n",
+    "                ftest.write(line)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Att-BiLSTM-CRF Model Performance Analysis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def abc_strict_match(gs, pred, s_idx, e_idx, ent_type):\n",
+    "    if s_idx == 0:\n",
+    "        for idx in range(s_idx, e_idx):\n",
+    "            if gs[idx] != pred[idx]:\n",
+    "                return False\n",
+    "        if e_idx < len(gs):\n",
+    "            if gs[e_idx] == ent_type or pred[e_idx] == ent_type:\n",
+    "                return False\n",
+    "    else:\n",
+    "        if gs[s_idx-1] == ent_type or pred[s_idx-1] == ent_type:\n",
+    "            return False\n",
+    "        for idx in range(s_idx, e_idx):\n",
+    "            if gs[idx] != pred[idx]:\n",
+    "                return False\n",
+    "        if e_idx < len(gs):\n",
+    "            if gs[e_idx] == ent_type or pred[e_idx] == ent_type:\n",
+    "                return False\n",
+    "    return True\n",
+    "\n",
+    "def abc_relax_match(gs, pred, s_idx, e_idx, ent_type):\n",
+    "    for idx in range(s_idx, e_idx):\n",
+    "        if gs[idx] == pred[idx] == ent_type:\n",
+    "            return True\n",
+    "    return False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = 'chia' # or 'frd', 'chia'\n",
+    "outfolder = f\"attbilstmcrf\"\n",
+    "outfile = f\"{dataset}_attbilstmcrf_results.txt\"\n",
+    "labels_dict = {'chia':['Mood', 'Condition', 'Procedure', 'Measurement', 'Value', 'Drug', 'Temporal', 'Observation', 'Pregnancy', 'Person', 'Device'], 'frd':['chronic_disease', 'treatment', 'upper_bound', 'pregnancy', 'clinical_variable', 'lower_bound', 'cancer', 'age', 'language_fluency', 'gender', 'contraception_consent', 'technology_access', 'allergy_name', 'bmi', 'ethnicity']}\n",
+    "labels = labels_dict[dataset]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eval_metrics = {\"category\":{},\"overall\":{}, \"prediction\":{}}\n",
+    "with open(f\"{outfolder}/{outfile}\", \"r\", encoding=\"utf-8\") as f:\n",
+    "    next(f)\n",
+    "    for line in f:\n",
+    "        gs = eval(line.split('\\t')[1])\n",
+    "        pred = eval(line.split('\\t')[0])\n",
+    "        for i in zip(gs, pred):\n",
+    "            if i[0] == i[1]: eval_metrics[\"overall\"][\"acc_true\"] = eval_metrics[\"overall\"].get(\"acc_true\", 0) + 1\n",
+    "            else: eval_metrics[\"overall\"][\"acc_false\"] = eval_metrics[\"overall\"].get(\"acc_false\", 0) + 1\n",
+    "        llen = len(gs)\n",
+    "        cur_idx = 0\n",
+    "        while cur_idx < llen:\n",
+    "            if gs[cur_idx] == 0:\n",
+    "                cur_idx += 1\n",
+    "            else:\n",
+    "                start_idx = cur_idx\n",
+    "                end_idx = start_idx + 1\n",
+    "                cate = gs[start_idx]\n",
+    "                while end_idx < llen and gs[end_idx] == cate:\n",
+    "                    end_idx += 1\n",
+    "                eval_metrics[\"overall\"]['gs'] = eval_metrics[\"overall\"].get('gs', {})\n",
+    "                eval_metrics[\"overall\"]['gs']['count'] = eval_metrics[\"overall\"]['gs'].get('count', 0) + 1\n",
+    "                eval_metrics[\"overall\"]['gs'][labels[cate-1]] = eval_metrics[\"overall\"]['gs'].get(labels[cate-1], 0) + 1\n",
+    "                if abc_strict_match(gs, pred, start_idx, end_idx, cate):\n",
+    "                    eval_metrics[\"overall\"][\"strict_predicted\"] = eval_metrics[\"overall\"].get(\"strict_predicted\", 0) + 1\n",
+    "                    eval_metrics[\"category\"][labels[cate-1]] = eval_metrics[\"category\"].get(labels[cate-1], {\"strict\":0, \"relax\":0, \"miss\":0})\n",
+    "                    eval_metrics[\"category\"][labels[cate-1]][\"strict\"] += 1\n",
+    "                elif abc_relax_match(gs, pred, start_idx, end_idx, cate):\n",
+    "                    eval_metrics[\"overall\"][\"relax_predicted\"] = eval_metrics[\"overall\"].get(\"relax_predicted\", 0) + 1\n",
+    "                    eval_metrics[\"category\"][labels[cate-1]] = eval_metrics[\"category\"].get(labels[cate-1], {\"strict\":0, \"relax\":0, \"miss\":0})\n",
+    "                    eval_metrics[\"category\"][labels[cate-1]][\"relax\"] += 1\n",
+    "                else:\n",
+    "                    eval_metrics[\"overall\"][\"miss_predicted\"] = eval_metrics[\"overall\"].get(\"miss_predicted\", 0) + 1\n",
+    "                    eval_metrics[\"category\"][labels[cate-1]] = eval_metrics[\"category\"].get(labels[cate-1], {\"strict\":0, \"relax\":0, \"miss\":0})\n",
+    "                    eval_metrics[\"category\"][labels[cate-1]][\"miss\"] += 1\n",
+    "                cur_idx = end_idx\n",
+    "        cur_idx = 0\n",
+    "        while cur_idx < llen:\n",
+    "            if pred[cur_idx] == 0:\n",
+    "                cur_idx += 1\n",
+    "            else:\n",
+    "                start_idx = cur_idx\n",
+    "                end_idx = start_idx + 1\n",
+    "                cate = pred[start_idx]\n",
+    "                while end_idx < llen and pred[end_idx] == cate:\n",
+    "                    end_idx += 1\n",
+    "                if abc_strict_match(gs, pred, start_idx, end_idx, cate):\n",
+    "                    eval_metrics[\"overall\"][\"strict_predict\"] = eval_metrics[\"overall\"].get(\"strict_predict\", 0) + 1\n",
+    "                    eval_metrics[\"prediction\"][labels[cate-1]] = eval_metrics[\"prediction\"].get(labels[cate-1], {\"strict\":0, \"relax\":0, \"miss\":0})\n",
+    "                    eval_metrics[\"prediction\"][labels[cate-1]][\"strict\"] += 1\n",
+    "                elif abc_relax_match(gs, pred, start_idx, end_idx, cate):\n",
+    "                    eval_metrics[\"overall\"][\"relax_predict\"] = eval_metrics[\"overall\"].get(\"relax_predict\", 0) + 1\n",
+    "                    eval_metrics[\"prediction\"][labels[cate-1]] = eval_metrics[\"prediction\"].get(labels[cate-1], {\"strict\":0, \"relax\":0, \"miss\":0})\n",
+    "                    eval_metrics[\"prediction\"][labels[cate-1]][\"relax\"] += 1\n",
+    "                else:\n",
+    "                    eval_metrics[\"overall\"][\"miss_predict\"] = eval_metrics[\"overall\"].get(\"miss_predict\", 0) + 1\n",
+    "                    eval_metrics[\"prediction\"][labels[cate-1]] = eval_metrics[\"prediction\"].get(labels[cate-1], {\"strict\":0, \"relax\":0, \"miss\":0})\n",
+    "                    eval_metrics[\"prediction\"][labels[cate-1]][\"miss\"] += 1\n",
+    "                cur_idx = end_idx"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overall Relax Level: Precision: 0.706389088298636, Recall: 0.734399375975039, F1: 0.7201219589214525\n",
+      "Overall Strict Level: Precision: 0.35857860732232594, Recall: 0.38962558502340094, F1: 0.3734579439252337\n",
+      "\n",
+      "\n",
+      "Relax Level for Condition: Precision: 0.8081587651598677, Recall: 0.8273045507584598, F1: 0.8176195915182294\n",
+      "Strict Level for Condition: Precision: 0.43439911797133407, Recall: 0.4597432905484247, F1: 0.4467120181405896\n",
+      "\n",
+      "\n",
+      "Relax Level for Procedure: Precision: 0.6728110599078341, Recall: 0.4857142857142857, F1: 0.5641550176156381\n",
+      "Strict Level for Procedure: Precision: 0.2626728110599078, Recall: 0.20357142857142857, F1: 0.22937625754527163\n",
+      "\n",
+      "\n",
+      "Relax Level for Temporal: Precision: 0.5663956639566395, Recall: 0.853448275862069, F1: 0.6809049773755657\n",
+      "Strict Level for Temporal: Precision: 0.2601626016260163, Recall: 0.41379310344827586, F1: 0.3194675540765391\n",
+      "\n",
+      "\n",
+      "Relax Level for Pregnancy: Precision: 0.38596491228070173, Recall: 0.5555555555555556, F1: 0.45548654244306414\n",
+      "Strict Level for Pregnancy: Precision: 0.03508771929824561, Recall: 0.1111111111111111, F1: 0.05333333333333334\n",
+      "\n",
+      "\n",
+      "Relax Level for Observation: Precision: 0.5342465753424658, Recall: 0.42011834319526625, F1: 0.47035841685068797\n",
+      "Strict Level for Observation: Precision: 0.18493150684931506, Recall: 0.15976331360946747, F1: 0.17142857142857143\n",
+      "\n",
+      "\n",
+      "Relax Level for Drug: Precision: 0.6677115987460815, Recall: 0.8167330677290837, F1: 0.7347422975315082\n",
+      "Strict Level for Drug: Precision: 0.2884012539184953, Recall: 0.3665338645418327, F1: 0.32280701754385965\n",
+      "\n",
+      "\n",
+      "Relax Level for Person: Precision: 0.7818181818181819, Recall: 0.7589285714285714, F1: 0.7702033505426193\n",
+      "Strict Level for Person: Precision: 0.6363636363636364, Recall: 0.625, F1: 0.6306306306306306\n",
+      "\n",
+      "\n",
+      "Relax Level for Value: Precision: 0.7839506172839507, Recall: 0.7961783439490446, F1: 0.790017168877056\n",
+      "Strict Level for Value: Precision: 0.5030864197530864, Recall: 0.5191082802547771, F1: 0.5109717868338558\n",
+      "\n",
+      "\n",
+      "Relax Level for Measurement: Precision: 0.7153284671532847, Recall: 0.735632183908046, F1: 0.7253382676072626\n",
+      "Strict Level for Measurement: Precision: 0.3467153284671533, Recall: 0.36398467432950193, F1: 0.3551401869158879\n",
+      "\n",
+      "\n",
+      "Relax Level for Mood: Precision: 0.3888888888888889, Recall: 0.30434782608695654, F1: 0.34146341463414637\n",
+      "Strict Level for Mood: Precision: 0.05555555555555555, Recall: 0.043478260869565216, F1: 0.04878048780487805\n",
+      "\n",
+      "\n",
+      "Relax Level for Device: Precision: 0.6296296296296297, Recall: 0.5416666666666666, F1: 0.5823451910408431\n",
+      "Strict Level for Device: Precision: 0.037037037037037035, Recall: 0.041666666666666664, F1: 0.03921568627450981\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "attbc_metrics = {\"category\":{},\"overall\":{}}\n",
+    "attbc_metrics[\"overall\"][\"acc\"] = eval_metrics[\"overall\"][\"acc_true\"]/(eval_metrics[\"overall\"][\"acc_true\"]+eval_metrics[\"overall\"][\"acc_false\"])\n",
+    "pred_all = eval_metrics[\"overall\"]['strict_predict'] + eval_metrics[\"overall\"]['relax_predict'] + eval_metrics[\"overall\"]['miss_predict']\n",
+    "pre_relax_all = (eval_metrics[\"overall\"]['strict_predict'] + eval_metrics[\"overall\"]['relax_predict'])/ pred_all\n",
+    "rec_relax_all = (eval_metrics[\"overall\"]['strict_predicted'] + eval_metrics[\"overall\"]['relax_predicted'])/ eval_metrics[\"overall\"]['gs']['count']\n",
+    "f1_relax_all = (2*pre_relax_all*rec_relax_all)/(pre_relax_all+rec_relax_all)\n",
+    "print(f\"Overall Relax Level: Precision: {pre_relax_all}, Recall: {rec_relax_all}, F1: {f1_relax_all}\")\n",
+    "attbc_metrics[\"overall\"][\"relax\"] = {\"f_score\": f1_relax_all, \"precision\":pre_relax_all, \"recall\":rec_relax_all}\n",
+    "\n",
+    "pre_strict_all = eval_metrics[\"overall\"]['strict_predict'] / pred_all\n",
+    "rec_strict_all = eval_metrics[\"overall\"]['strict_predicted'] / eval_metrics[\"overall\"]['gs']['count']\n",
+    "f1_strict_all = (2*pre_strict_all*rec_strict_all)/(pre_strict_all+rec_strict_all)\n",
+    "print(f\"Overall Strict Level: Precision: {pre_strict_all}, Recall: {rec_strict_all}, F1: {f1_strict_all}\")\n",
+    "print('\\n')\n",
+    "attbc_metrics[\"overall\"][\"strict\"] = {\"f_score\": f1_strict_all, \"precision\":pre_strict_all, \"recall\":rec_strict_all}\n",
+    "\n",
+    "for i in eval_metrics[\"category\"].keys():\n",
+    "    tt = eval_metrics[\"overall\"]['gs'][i]\n",
+    "    tp = eval_metrics[\"prediction\"][i]['strict'] + eval_metrics[\"prediction\"][i]['relax'] + eval_metrics[\"prediction\"][i]['miss']\n",
+    "    \n",
+    "    pre_relax = (eval_metrics[\"prediction\"][i]['strict']+eval_metrics[\"prediction\"][i]['relax'])/tp\n",
+    "    rec_relax = (eval_metrics[\"category\"][i]['strict']+eval_metrics[\"category\"][i]['relax'])/tt\n",
+    "    f1_relax = (2*pre_relax*rec_relax)/(pre_relax+rec_relax)\n",
+    "    print(f\"Relax Level for {i}: Precision: {pre_relax}, Recall: {rec_relax}, F1: {f1_relax}\")\n",
+    "    attbc_metrics[\"category\"][\"relax\"] = attbc_metrics[\"category\"].get(\"relax\", {})\n",
+    "    attbc_metrics[\"category\"][\"relax\"][i] = {\"f_score\": f1_relax, \"precision\":pre_relax, \"recall\":rec_relax}\n",
+    "\n",
+    "    pre_strict = eval_metrics[\"prediction\"][i]['strict']/tp\n",
+    "    rec_strict = eval_metrics[\"category\"][i]['strict']/tt\n",
+    "    f1_strict = (2*pre_strict*rec_strict)/(pre_strict+rec_strict) if (pre_strict+rec_strict) != 0 else 0.0\n",
+    "    print(f\"Strict Level for {i}: Precision: {pre_strict}, Recall: {rec_strict}, F1: {f1_strict}\")\n",
+    "    print('\\n')\n",
+    "    attbc_metrics[\"category\"][\"strict\"] = attbc_metrics[\"category\"].get(\"strict\", {})\n",
+    "    attbc_metrics[\"category\"][\"strict\"][i] = {\"f_score\": f1_strict, \"precision\":pre_strict, \"recall\":rec_strict}\n",
+    "json.dump(attbc_metrics, open(f\"{outfolder}/{outfile}.json\", \"w\", encoding=\"utf-8\"), indent=4)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Transformer-based Model Performance and Error Analysis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# strict matching function\n",
+    "def bio_strict_match(gs, pred, s_idx, e_idx, en_type):\n",
+    "    if gs[s_idx] != f\"B-{en_type}\" or pred[s_idx] != f\"B-{en_type}\":\n",
+    "        return False\n",
+    "    # every token in the span need to have the same label\n",
+    "    for idx in range(s_idx, e_idx):\n",
+    "        if gs[idx] != pred[idx]:\n",
+    "            return False\n",
+    "    # token after end in GS is not continued entity token\n",
+    "    # if e_idx < len(gs) and gs[e_idx] == f\"I-{en_type}\":\n",
+    "    if e_idx < len(gs) and (pred[e_idx] == f\"I-{en_type}\" or gs[e_idx] == f\"I-{en_type}\"):\n",
+    "        return False\n",
+    "    return True\n",
+    "\n",
+    "# relax matching function\n",
+    "def bio_relax_match(gs, pred, s_idx, e_idx, en_type):\n",
+    "    for idx in range(s_idx, e_idx):\n",
+    "        gs_cate = gs[idx].split(\"-\")[-1] if \"-\" in gs[idx] else \"O\"\n",
+    "        pred_cate = pred[idx].split(\"-\")[-1] if \"-\" in pred[idx] else \"O\"\n",
+    "        if gs_cate == pred_cate == en_type:\n",
+    "            return True\n",
+    "    return False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = 'chia' # or 'frd', 'chia'\n",
+    "outfolder = f\"transformer\"\n",
+    "test_files = os.listdir(f\"{dataset}/tests/\")\n",
+    "models = ['bert', 'bert_mimic', 'albert', 'albert_mimic', 'roberta', 'roberta_mimic', 'electra', 'electra_mimic'] #'distilbert'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = models[5]\n",
+    "predictions = {}\n",
+    "eval_metrics = {\"category\":{},\"overall\":{}, \"prediction\":{}}\n",
+    "for file in test_files:\n",
+    "    file_id = file.split('.')[0]\n",
+    "    # load the annotation and prediction files\n",
+    "    with open(f\"{dataset}/tests/{file}\", \"r\", encoding=\"utf-8\") as f:\n",
+    "        test_anno = f.read().strip().split('\\n\\n')\n",
+    "        test_anno = [sent.split('\\n') for sent in test_anno]\n",
+    "    with open(f\"{outfolder}/{dataset}_results/{dataset}_{model}_results/{file}\", \"r\", encoding=\"utf-8\") as f:\n",
+    "        test_pred = f.read().strip().split('\\n\\n')\n",
+    "        test_pred = [sent.split('\\n') for sent in test_pred]\n",
+    "    assert len(test_anno) == len(test_pred)\n",
+    "    # compare annotation label and prediction label sentence by sentence\n",
+    "    file_preds = {\"predicted\":{}, \"prediction\":{}}\n",
+    "    for anno, pred in zip(test_anno, test_pred):\n",
+    "        assert len(anno) == len(pred)\n",
+    "        anno_bio = [i.split()[-1] for i in anno]\n",
+    "        pred_bio = [i.split()[-1] for i in pred]\n",
+    "        for i in zip(anno_bio, pred_bio):\n",
+    "            if i[0] == i[1]: eval_metrics[\"overall\"][\"acc_true\"] = eval_metrics[\"overall\"].get(\"acc_true\", 0) + 1\n",
+    "            else: eval_metrics[\"overall\"][\"acc_false\"] = eval_metrics[\"overall\"].get(\"acc_false\", 0) + 1\n",
+    "        # process gold standard\n",
+    "        llen = len(anno)\n",
+    "        cur_idx = 0\n",
+    "        while cur_idx < llen:\n",
+    "            if anno_bio[cur_idx].strip() == 'O':\n",
+    "                cur_idx += 1\n",
+    "            else:\n",
+    "                start_idx = cur_idx\n",
+    "                end_idx = start_idx + 1\n",
+    "                _, cate = anno_bio[start_idx].strip().split('-')\n",
+    "                while end_idx < llen and anno_bio[end_idx].strip() == f\"I-{cate}\":\n",
+    "                    end_idx += 1\n",
+    "                match_entity = [f\"{anno[idx]} {pred_bio[idx]}\" for idx in range(start_idx, end_idx)]\n",
+    "                eval_metrics[\"overall\"]['gs'] = eval_metrics[\"overall\"].get('gs', {})\n",
+    "                eval_metrics[\"overall\"]['gs']['count'] = eval_metrics[\"overall\"]['gs'].get('count', 0) + 1\n",
+    "                eval_metrics[\"overall\"]['gs'][cate] = eval_metrics[\"overall\"]['gs'].get(cate, 0) + 1\n",
+    "                if bio_strict_match(anno_bio, pred_bio, start_idx, end_idx, cate):\n",
+    "                    file_preds[\"predicted\"]['strict'] = file_preds[\"predicted\"].get('strict', [])\n",
+    "                    file_preds[\"predicted\"]['strict'].append(match_entity)\n",
+    "                    eval_metrics[\"overall\"][\"strict_predicted\"] = eval_metrics[\"overall\"].get(\"strict_predicted\", 0) + 1\n",
+    "                    eval_metrics[\"category\"][cate] = eval_metrics[\"category\"].get(cate, {\"strict\":0, \"relax\":0, \"miss\":0})\n",
+    "                    eval_metrics[\"category\"][cate][\"strict\"] += 1\n",
+    "                elif bio_relax_match(anno_bio, pred_bio, start_idx, end_idx, cate):\n",
+    "                    file_preds[\"predicted\"]['relax'] = file_preds[\"predicted\"].get('relax', [])\n",
+    "                    file_preds[\"predicted\"]['relax'].append(match_entity)\n",
+    "                    eval_metrics[\"overall\"][\"relax_predicted\"] = eval_metrics[\"overall\"].get(\"relax_predicted\", 0) + 1\n",
+    "                    eval_metrics[\"category\"][cate] = eval_metrics[\"category\"].get(cate, {\"strict\":0, \"relax\":0, \"miss\":0})\n",
+    "                    eval_metrics[\"category\"][cate][\"relax\"] += 1\n",
+    "                else:\n",
+    "                    file_preds[\"predicted\"]['miss'] = file_preds[\"predicted\"].get('miss', [])\n",
+    "                    file_preds[\"predicted\"]['miss'].append(match_entity)\n",
+    "                    eval_metrics[\"overall\"][\"miss_predicted\"] = eval_metrics[\"overall\"].get(\"miss_predicted\", 0) + 1\n",
+    "                    eval_metrics[\"category\"][cate] = eval_metrics[\"category\"].get(cate, {\"strict\":0, \"relax\":0, \"miss\":0})\n",
+    "                    eval_metrics[\"category\"][cate][\"miss\"] += 1\n",
+    "                cur_idx = end_idx\n",
+    "        cur_idx = 0\n",
+    "        while cur_idx < llen:\n",
+    "            if pred_bio[cur_idx].strip() == 'O':\n",
+    "                cur_idx += 1\n",
+    "            else:\n",
+    "                start_idx = cur_idx\n",
+    "                end_idx = start_idx + 1\n",
+    "                _, cate = pred_bio[start_idx].strip().split('-')\n",
+    "                while end_idx < llen and pred_bio[end_idx].strip() == f\"I-{cate}\":\n",
+    "                    end_idx += 1\n",
+    "                match_entity = [f\"{anno[idx]} {pred_bio[idx]}\" for idx in range(start_idx, end_idx)]\n",
+    "                if bio_strict_match(anno_bio, pred_bio, start_idx, end_idx, cate):\n",
+    "                    file_preds[\"prediction\"]['strict'] = file_preds[\"prediction\"].get('strict', [])\n",
+    "                    file_preds[\"prediction\"]['strict'].append(match_entity)\n",
+    "                    eval_metrics[\"overall\"][\"strict_predict\"] = eval_metrics[\"overall\"].get(\"strict_predict\", 0) + 1\n",
+    "                    eval_metrics[\"prediction\"][cate] = eval_metrics[\"prediction\"].get(cate, {\"strict\":0, \"relax\":0, \"miss\":0})\n",
+    "                    eval_metrics[\"prediction\"][cate][\"strict\"] += 1\n",
+    "                elif bio_relax_match(anno_bio, pred_bio, start_idx, end_idx, cate):\n",
+    "                    file_preds[\"prediction\"]['relax'] = file_preds[\"prediction\"].get('relax', [])\n",
+    "                    file_preds[\"prediction\"]['relax'].append(match_entity)\n",
+    "                    eval_metrics[\"overall\"][\"relax_predict\"] = eval_metrics[\"overall\"].get(\"relax_predict\", 0) + 1\n",
+    "                    eval_metrics[\"prediction\"][cate] = eval_metrics[\"prediction\"].get(cate, {\"strict\":0, \"relax\":0, \"miss\":0})\n",
+    "                    eval_metrics[\"prediction\"][cate][\"relax\"] += 1\n",
+    "                else:\n",
+    "                    file_preds[\"prediction\"]['miss'] = file_preds[\"prediction\"].get('miss', [])\n",
+    "                    file_preds[\"prediction\"]['miss'].append(match_entity)\n",
+    "                    eval_metrics[\"overall\"][\"miss_predict\"] = eval_metrics[\"overall\"].get(\"miss_predict\", 0) + 1\n",
+    "                    eval_metrics[\"prediction\"][cate] = eval_metrics[\"prediction\"].get(cate, {\"strict\":0, \"relax\":0, \"miss\":0})\n",
+    "                    eval_metrics[\"prediction\"][cate][\"miss\"] += 1\n",
+    "                cur_idx = end_idx\n",
+    "    predictions[file_id] = file_preds\n",
+    "json.dump(eval_metrics, open(f\"{outfolder}/{dataset}_results/{dataset}_{model}_prediction.json\", \"w\", encoding=\"utf-8\"), indent=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overall Relax Level: Precision: 0.771102433163112, Recall: 0.8174917491749175, F1: 0.7936197726115226\n",
+      "Overall Strict Level: Precision: 0.6158005407029138, Recall: 0.6765676567656765, F1: 0.6447554646957069\n",
+      "\n",
+      "\n",
+      "Relax Level for Condition: Precision: 0.8694915254237288, Recall: 0.8998178506375227, F1: 0.884394788316812\n",
+      "Strict Level for Condition: Precision: 0.7186440677966102, Recall: 0.7723132969034608, F1: 0.7445127304653206\n",
+      "\n",
+      "\n",
+      "Relax Level for Observation: Precision: 0.546875, Recall: 0.38333333333333336, F1: 0.45072788353863386\n",
+      "Strict Level for Observation: Precision: 0.3671875, Recall: 0.2611111111111111, F1: 0.3051948051948052\n",
+      "\n",
+      "\n",
+      "Relax Level for Temporal: Precision: 0.6457142857142857, Recall: 0.8120300751879699, F1: 0.7193845972471926\n",
+      "Strict Level for Temporal: Precision: 0.4828571428571429, Recall: 0.6353383458646616, F1: 0.5487012987012988\n",
+      "\n",
+      "\n",
+      "Relax Level for Drug: Precision: 0.8448275862068966, Recall: 0.9228295819935691, F1: 0.8821075740944017\n",
+      "Strict Level for Drug: Precision: 0.7011494252873564, Recall: 0.7845659163987139, F1: 0.7405159332321699\n",
+      "\n",
+      "\n",
+      "Relax Level for Procedure: Precision: 0.6602209944751382, Recall: 0.721875, F1: 0.6896728335686\n",
+      "Strict Level for Procedure: Precision: 0.5082872928176796, Recall: 0.575, F1: 0.5395894428152493\n",
+      "\n",
+      "\n",
+      "Relax Level for Value: Precision: 0.8157894736842105, Recall: 0.8501529051987767, F1: 0.8326167817979807\n",
+      "Strict Level for Value: Precision: 0.672514619883041, Recall: 0.7033639143730887, F1: 0.6875934230194319\n",
+      "\n",
+      "\n",
+      "Relax Level for Measurement: Precision: 0.767515923566879, Recall: 0.8, F1: 0.7834213734254368\n",
+      "Strict Level for Measurement: Precision: 0.5414012738853503, Recall: 0.6071428571428571, F1: 0.5723905723905723\n",
+      "\n",
+      "\n",
+      "Relax Level for Person: Precision: 0.7639751552795031, Recall: 0.8785714285714286, F1: 0.8172757475083056\n",
+      "Strict Level for Person: Precision: 0.7329192546583851, Recall: 0.8428571428571429, F1: 0.7840531561461795\n",
+      "\n",
+      "\n",
+      "Relax Level for Mood: Precision: 0.36507936507936506, Recall: 0.4489795918367347, F1: 0.4027059291683247\n",
+      "Strict Level for Mood: Precision: 0.1746031746031746, Recall: 0.22448979591836735, F1: 0.19642857142857142\n",
+      "\n",
+      "\n",
+      "Relax Level for Device: Precision: 0.5892857142857143, Recall: 0.8048780487804879, F1: 0.6804123711340206\n",
+      "Strict Level for Device: Precision: 0.5178571428571429, Recall: 0.7073170731707317, F1: 0.5979381443298969\n",
+      "\n",
+      "\n",
+      "Relax Level for Pregnancy_considerations: Precision: 0.52, Recall: 0.3333333333333333, F1: 0.40625000000000006\n",
+      "Strict Level for Pregnancy_considerations: Precision: 0.0, Recall: 0.0, F1: 0.0\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "attbc_metrics = {\"category\":{},\"overall\":{}}\n",
+    "attbc_metrics[\"overall\"][\"acc\"] = eval_metrics[\"overall\"][\"acc_true\"]/(eval_metrics[\"overall\"][\"acc_true\"]+eval_metrics[\"overall\"][\"acc_false\"])\n",
+    "pred_all = eval_metrics[\"overall\"]['strict_predict'] + eval_metrics[\"overall\"]['relax_predict'] + eval_metrics[\"overall\"]['miss_predict']\n",
+    "pre_relax_all = (eval_metrics[\"overall\"]['strict_predict'] + eval_metrics[\"overall\"]['relax_predict'])/ pred_all\n",
+    "rec_relax_all = (eval_metrics[\"overall\"]['strict_predicted'] + eval_metrics[\"overall\"]['relax_predicted'])/ eval_metrics[\"overall\"]['gs']['count']\n",
+    "f1_relax_all = (2*pre_relax_all*rec_relax_all)/(pre_relax_all+rec_relax_all)\n",
+    "print(f\"Overall Relax Level: Precision: {pre_relax_all}, Recall: {rec_relax_all}, F1: {f1_relax_all}\")\n",
+    "attbc_metrics[\"overall\"][\"relax\"] = {\"f_score\": f1_relax_all, \"precision\":pre_relax_all, \"recall\":rec_relax_all}\n",
+    "\n",
+    "pre_strict_all = eval_metrics[\"overall\"]['strict_predict'] / pred_all\n",
+    "rec_strict_all = eval_metrics[\"overall\"]['strict_predicted'] / eval_metrics[\"overall\"]['gs']['count']\n",
+    "f1_strict_all = (2*pre_strict_all*rec_strict_all)/(pre_strict_all+rec_strict_all)\n",
+    "print(f\"Overall Strict Level: Precision: {pre_strict_all}, Recall: {rec_strict_all}, F1: {f1_strict_all}\")\n",
+    "print('\\n')\n",
+    "attbc_metrics[\"overall\"][\"strict\"] = {\"f_score\": f1_strict_all, \"precision\":pre_strict_all, \"recall\":rec_strict_all}\n",
+    "for i in eval_metrics[\"category\"].keys():\n",
+    "    tt = eval_metrics[\"overall\"]['gs'][i]\n",
+    "    tp = eval_metrics[\"prediction\"][i]['strict'] + eval_metrics[\"prediction\"][i]['relax'] + eval_metrics[\"prediction\"][i]['miss'] if i in eval_metrics[\"prediction\"] else 0\n",
+    "    \n",
+    "    pre_relax = (eval_metrics[\"prediction\"][i]['strict']+eval_metrics[\"prediction\"][i]['relax'])/tp if tp != 0 else 0 \n",
+    "    rec_relax = (eval_metrics[\"category\"][i]['strict']+eval_metrics[\"category\"][i]['relax'])/tt\n",
+    "    f1_relax = (2*pre_relax*rec_relax)/(pre_relax+rec_relax) if (pre_relax+rec_relax) != 0 else 0.0\n",
+    "    print(f\"Relax Level for {i}: Precision: {pre_relax}, Recall: {rec_relax}, F1: {f1_relax}\")\n",
+    "    attbc_metrics[\"category\"][\"relax\"] = attbc_metrics[\"category\"].get(\"relax\", {})\n",
+    "    attbc_metrics[\"category\"][\"relax\"][i] = {\"f_score\": f1_relax, \"precision\":pre_relax, \"recall\":rec_relax}\n",
+    "\n",
+    "    pre_strict = eval_metrics[\"prediction\"][i]['strict']/tp if tp != 0 else 0 \n",
+    "    rec_strict = eval_metrics[\"category\"][i]['strict']/tt\n",
+    "    f1_strict = (2*pre_strict*rec_strict)/(pre_strict+rec_strict) if (pre_strict+rec_strict) != 0 else 0.0\n",
+    "    print(f\"Strict Level for {i}: Precision: {pre_strict}, Recall: {rec_strict}, F1: {f1_strict}\")\n",
+    "    print('\\n')\n",
+    "    attbc_metrics[\"category\"][\"strict\"] = attbc_metrics[\"category\"].get(\"strict\", {})\n",
+    "    attbc_metrics[\"category\"][\"strict\"][i] = {\"f_score\": f1_strict, \"precision\":pre_strict, \"recall\":rec_strict}\n",
+    "json.dump(attbc_metrics, open(f\"{outfolder}/{dataset}_results/{dataset}_{model}_eval_metric.json\", \"w\", encoding=\"utf-8\"), indent=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# function for loading performance output file\n",
+    "def get_perform_metric(perform):\n",
+    "    ent_type = ['Overall']\n",
+    "    pre_strict = [perform['overall']['strict']['precision']]\n",
+    "    rec_strict = [perform['overall']['strict']['recall']]\n",
+    "    f1_strict = [perform['overall']['strict']['f_score']]\n",
+    "    pre_relax = [perform['overall']['relax']['precision']]\n",
+    "    rec_relax = [perform['overall']['relax']['recall']]\n",
+    "    f1_relax = [perform['overall']['relax']['f_score']]\n",
+    "    for k, v in perform['category']['strict'].items():\n",
+    "        ent_type.append(k)\n",
+    "        pre_strict.append(v['precision'])\n",
+    "        rec_strict.append(v['recall'])\n",
+    "        f1_strict.append(v['f_score'])\n",
+    "    for k in ent_type[1:]:\n",
+    "        pre_relax.append(perform['category']['relax'][k]['precision'])\n",
+    "        rec_relax.append(perform['category']['relax'][k]['recall'])\n",
+    "        f1_relax.append(perform['category']['relax'][k]['f_score'])\n",
+    "    return {'type':ent_type, 'pre_strict':pre_strict, 'rec_strict':rec_strict, 'f1_strict':f1_strict, 'pre_relax':pre_relax, 'rec_relax':rec_relax, 'f1_relax':f1_relax}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                        type  pre_strict  rec_strict  f1_strict  pre_relax  \\\n",
+      "0                    Overall    0.615801    0.676568   0.644755   0.771102   \n",
+      "1                  Condition    0.718644    0.772313   0.744513   0.869492   \n",
+      "10                    Device    0.517857    0.707317   0.597938   0.589286   \n",
+      "4                       Drug    0.701149    0.784566   0.740516   0.844828   \n",
+      "7                Measurement    0.541401    0.607143   0.572391   0.767516   \n",
+      "9                       Mood    0.174603    0.224490   0.196429   0.365079   \n",
+      "2                Observation    0.367188    0.261111   0.305195   0.546875   \n",
+      "8                     Person    0.732919    0.842857   0.784053   0.763975   \n",
+      "11  Pregnancy_considerations    0.000000    0.000000   0.000000   0.520000   \n",
+      "5                  Procedure    0.508287    0.575000   0.539589   0.660221   \n",
+      "3                   Temporal    0.482857    0.635338   0.548701   0.645714   \n",
+      "6                      Value    0.672515    0.703364   0.687593   0.815789   \n",
+      "\n",
+      "    rec_relax  f1_relax  \n",
+      "0    0.817492  0.793620  \n",
+      "1    0.899818  0.884395  \n",
+      "10   0.804878  0.680412  \n",
+      "4    0.922830  0.882108  \n",
+      "7    0.800000  0.783421  \n",
+      "9    0.448980  0.402706  \n",
+      "2    0.383333  0.450728  \n",
+      "8    0.878571  0.817276  \n",
+      "11   0.333333  0.406250  \n",
+      "5    0.721875  0.689673  \n",
+      "3    0.812030  0.719385  \n",
+      "6    0.850153  0.832617  \n"
+     ]
+    }
+   ],
+   "source": [
+    "# load performance output file\n",
+    "perf_file = json.load(open(f\"{outfolder}/{dataset}_results/{dataset}_{model}_eval_metric.json\"))\n",
+    "perf_metrics = pd.DataFrame(data=get_perform_metric(perf_file))\n",
+    "df = pd.concat([perf_metrics.loc[[0]], perf_metrics[1:].sort_values(by=['type'])])\n",
+    "# print performance by entity type\n",
+    "print(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAGBCAYAAAB2PEr5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO3dedxUZf3/8dcbXFBB3LBSVMgtl1wIVNLMJUsztUxLzUwrDZe09Zu2amk/K3MvydTUMrcWNRc0TVxKE1Ry1xBRCS3FBXcFPr8/rmtgGOa+Z7iZM3Pfx/fz8eDBPeecOdc1M2c+55prVURgZmZ9X79OZ8DMzFrDAd3MrCQc0M3MSsIB3cysJBzQzcxKwgHdzKwkHNDNzEribRPQJY2V9L1O56OWpPGSvljQuadK+lAR5367k7SupLslvSTp8E7nxwz6SECXtJWkf0h6UdJzkv4uaVTet7+kWxudIyLGRMSPmkirsAC7sCQdLektSS9LeiG/B6N7Qb5C0is5Xy9LeiFvX0LSH/KNJCRt0+T5hkr6o6Rn82d8r6T9i3wNLfB/wPiIGBQRp9buzNfR6/n9eVbSnyS9q5kTS9pG0pyq9/c/ko6pOab2M3hZ0v/lfV1eN5I+U3X8azXpvLywb4KkATkvT0rqV7V9SUnPS3q9atvtkvbNf++Yn/f7mvNtkbePqzn/0Kpj1s/v54z8+iZJOlySFjb/C/laj696Xyv/Dq96bZXP+xlJl0gaUue5L+V/D0k6RdLKrcxjrw/okpYFrgROA1YAVgWOAd5YiHP0LyZ3bXFxRAwEVgJuBC7tcH4qNo6IgfnfclXbbwX2BZ5eiHP9FngSWANYEdgP+G/LcgpIWqyV5yPl9f4GxxyWP7u1gIHACQtx/umV9xfYCviCpI/XHFP9GQyMiJ9W7at73UTEBVXn3ak6nbytp14Ftq96vBvwvwbPeQrYPn/HK/YDHunqCZLeA9wGPAxskK+9fYAPAEv2IN8L67ya97z6Zv7F/B6uC6wMHF/nuYNI1/iewDBgYnXgX1S9PqAD6wBExIURMTsiXouI6yLiHknrAWOB0TUlxXMlnSHpakmvANvmbcdWTippt3xnnynp0VxiOI50YZyez3d6vQxJulTS07k0ebOkDar2nSvpF5Kuynfif0pas2r/Dvnu/GI+f1OlioiYBVwArFpz5/9Yfh2VkthGXeR5M0m35eOeknS6pCXyvvfnUuRq+fHG+bj3NJO3qjy+GREnR8StwOyFeOoo4NyIeCUiZkXE3RFxTVXeK7/QXsglwf3z9sGSzs8losclfbdSSlT65fZ3SSdJeg44Om//vKQHc+nxWklrdJUpSbtKuj+nOz5fb0j6G7At866TdRq8Ly8AlwGbVJ17SUknS5qe/50sqW5AiojHgH8A6zd+Kxd4bt3rpjuSvi/psXz93idp5yae9ltSMK7YDzi/wXNeBa4hBTfy9fgJ4MJunnMscH1EHBURTwNExAMRsWdEvF7vCZIOzd/xGUol+3fk7ZXS/4F5//OSTmritXYrIp4DrqDq867Z/2ZE3AvsAbwCHLGoaVb0hYD+CDBb0nmSdpK0fGVHRDwIjAFuq1NS3Ac4DhhEKjXOJWkz0sX2TWA5YGtgakR8B7iFXLKKiMO6yNM1wNqku/BdpC9Mtb1JvyKWBybnfCBpJeCPwHdJJadHgS2beRPyxb4fMAN4Pm8bAZwDfIl01/8VcEUXgWE28NWc7mhSaeoQgIj4R37ueZKWIn05vxsRDzWTtxa4HfiFpL0krV69Iz++hvQLbQjpSzIp7z4NGAy8G/gg6f05oOrpmwNTSJ/TcbmE+21g93yuW+gieOQgfSHwlXzs1cBfJC0REdsx/3XSZYkyn2vFnObkqs3fAbbIr2djYDPSdVHv+WuTrpPbu0uni+cucN004WHg/aT39ifARfna7c4fgI9IGqhUjfA+0nvWyPnMuxF8DLgDeLab4z+U02qKpI8C3yPdKFbN5/5dzWE7AZsCI4AD1GRVYTdpDgE+zvyf9wIi4i3gL6RCZGtERK//B6wHnAtMA2aR7n7vyPv2B26tOf5c4Pw6247Nf/8KOKmLtMaTfjo1m7flgAAGV6VzVtX+jwIP5b/3A26v2qf8muqmRypVvgm8QArIM4BtqvafAfyo5jkPAx/Mf08FPtTFub8C/Lnq8eLAncC9wDhA3bzmAGbmfL0AnFrnmGnVeW3wHi5P+nl6f36dk4BRed9R1fmsek5/UrXb+lXbvkSq165cF0/UPOca4AtVj/uRSolr1Dn/94BLao79T+U1NbpO8v5XgRfz+zUJWL1q/6PAR6sef4RUqADYBpiT39uZ+fl/Apbo5jN4AfhIM9dN1Tm2AaY18fk8VDl3nX0Dcl6GkgLl5/K1dRqwIfB61bG3A/vmv3ckBTzl63QY6VfMJ4HDgHF1zt8//93UdZWffwHww5rv6xzgnVXnHlm1/wrgK12c6/h8zVW/5ytUvbZXqj6vCcAqNc89q845vwLc2+zrafSvL5TQiYgHI2L/iBhKukhWAU5u8LQnu9m3GukLtdAk9Vdq4HhU0kzSxQip5FtRXX/8Kqn+FFK+5+Yr0ifaXT4hBZXlgHcA95FKPhVrAF/PVQIvKFU5rZbTqc33OpKuzFVFM4EfV+c5UmnhXNL7+/Oct+6MiIjl8r9F6uUREc9HxJERsUF+nZOAyySJrj+rlYAlgMertj1OKoVV1L63awCnVL1Xz5ECyqosaJXqc0fEnHy+esd25fCIGAxsRLppDa3aN9/589/Vn9v0/N4uSwpCrwHn1Zy/+jNYLiKurdrX3XXTLUlfkHRP1fu0Fvlaydd9pUFwVM1TK6XtZqpbgLnfgd+TAtsWpPayro6dTbpBNtW4nNV+jpWbZPXn2NX3tZ7f1rznz1Xt+1L+vEaQbhgLfA/rWJV0HbZEnwjo1SJVA5xLCjyQ7oZ1D+3mNE8Ca3axr1Eg24fU4PMh0k/SYXl7M3XhT5ECVHrCvIDVUEQ8SyqBHq15vSWeBI6rucCWjoh61QhnkEpaa+eL7tvVeZa0KvAD4DfAz7uqzy1afp0nkL4MK9D1Z/Us8BYpSFesTipFzz1dzXOeJH3pqt+vpSJVOdWaXn3uqs/qP3WO7Vak+tJjSdVKlfd8vvPnvE/v4vkvkoLeLj1Iu95106Vc1XQacBCp9Lkc80rSRMSaMa9BcELN068ntXktVWdfd84HDif9EmvU2eF6Uim+WbWf42BgWXrwOTYrIu4GfgrUbYOrystipGqmW1qVdq8P6JLeI+nryt2WcsPd3syrT/wvMLTSwNeks0l1ZdtL6idp1aoGwP+S6mS7Moj0s2sGsDSppNusq4ANJO2eP8zDSXfypuSb2bWkLnMAvwbGSNpcyTKSdpY0qIt8zwRezq/14MqOHGTOJb0vXyDdeBp28awnN/YNyA+XyA1P3d7sJP1E0oaSFst5PxiYHBEzSD+ZPyTpU3n/ipI2yaW1S0h144OUGje/xoL1o9XGAkcpN2IrNaru2cWxlwA752tkceDrpM+9XvBvxnmkuvxd8+MLge9KGpLrp7/fVd4lDQT2onGvmrrqXDfdGUiqkngG6CdpDKmE3kw6c0hVjLv3IH/bkNqdGvkesIOk46oaN9eVdHHVdVftQuDAfH0NILUJ/C1yg2qBzgLWlPSR2h2SFs/X4CWk7+UC3V57qtcHdOAlUuPWP5V6rNxO+gn59bz/b6QL/WlJ3TWmzBURd5Aaz04i/YS7iXl38VOAPXKLd703+nzST7j/AA+wEA1VubS0J6k+bQapYfXvzT4/+xlwkKSVI2IicCCpJPA8qSS1fxfP+wbp18VLpBvBxVX7Dif9NP9e/gl8AOmG15PGmodJ1QOrkoLIa8xfEq1naeDPpDrJKfn4XQEi4glSkPg66afpJFIjIsCXSfWWU0gN378nNRLXFRF/Zl4j30zSdbRTF8c+TOp+eRrp18AuwC4R8WaD19JV2m+SvriVwW3HAhOBe0jtFnflbRWraF7f8MdJv1Y+U3Paf2n+PtHdVUPOvW4a5PMu0o1vIunGPjz/3ZSIuDdSZ4WFEhE3NxNk87nfT+rx82CuErqIVMpdoHQfEVcC/49UNz6dVID67MLmb2FFxGuk72X1YMbPSXqJdJ3/mRRDRkVEo+6dTVPjqlIzM+sL+kIJ3czMmuCAbm2hNEDn5Tr/aqsRzKyHXOViZlYSrZ7fomkrrbRSDBs2rFPJm5n1SXfeeeezEVF3GoeOBfRhw4YxcWLTjedmZgZIeryrfa5DNzMrCQd0M7OScEA3MyuJjtWhm9nbz1tvvcW0adN4/fW6U5dblQEDBjB06FAWX3zxpp/jgG5mbTNt2jQGDRrEsGHDaDDFz9taRDBjxgymTZvG8OHDm36eq1zMrG1ef/11VlxxRQfzBiSx4oorLvQvGQd0M2srB/Pm9OR9ckA3MysJ16GbWccMO/Kqlp5v6vHNrGddXg7ojRw9uJt9L7YvH0Up++szq3HqqadyxhlnsP766zN9+nTuuusujjvuOL7xjW8s9LlOPvlkDjroIJZeeum6+7/4xS/yta99jfXXX7/u/vHjx7PEEkvw/ve/f6HTrscB3czeVn75y19yzTXXsMwyy/D4449z2WWX9fhcJ598Mvvuu2/dgD579mzOOuusbp8/fvx4Bg4c2LKA7jp0M3vbGDNmDFOmTGHXXXflggsuYNSoUU31837llVfYeeed2Xjjjdlwww25+OKLOfXUU5k+fTrbbrst2267LQADBw7k+9//Pptvvjm33XYb22yzzdw5q8aNG8eIESPYeOON2X777Zk6dSpjx47lpJNOYpNNNuGWWxZ9aVGX0M3sbWPs2LGMGzeOG2+8kZVWWqnp540bN45VVlmFq65Kdf4vvvgigwcP5sQTT5zvXK+88gobbrghP/zhD+d7/jPPPMOBBx7IzTffzPDhw3nuuedYYYUVGDNmDAMHDuxRdU89LqGbmTXw3ve+l+uvv55vfetb3HLLLQweXL/tqX///nzyk59cYPvtt9/O1ltvPXeQ0AorrFBIPh3QzcwaWGeddbjzzjt573vfy1FHHbVACbxiwIAB9O/ff4HtEdGW/veucjGzjukr3QynT5/OCiuswL777svAgQM599xzARg0aBAvvfRSw+qb0aNHc+ihh/LYY4/NV+UyaNAgZs6c2bJ8OqCb2dvS008/zciRI5k5cyb9+vXj5JNP5oEHHmDZZZdd4Nh7772Xb37zm/Tr14/FF1+cM844A4CDDjqInXbaiXe9613ceOONXaY1ZMgQzjzzTHbffXfmzJnDyiuvzF//+ld22WUX9thjDy6//HJOO+00PvCBDyzSa+rYmqIjR46MPrFiUdn7aZf99Vmv8uCDD7Leeut1Oht9Rr33S9KdETGy3vGuQzczKwlXuZiZZTNmzGD77bdfYPsNN9zAiiuu2IEcLRwHdDOzbMUVV2TSpEmdzkaPOaCbmXXC9Lu73rfKpj06pevQzcxKwgHdzKwkXOVi1leVoctpd6+hR+frI6+7IE2V0CXtKOlhSZMlHVln/2BJf5H0L0n3Szqg9Vk1M1t0p556Kuuttx6f/OQnGT16NEsuuSQnnHBCy85fPcNiuzUsoUvqD/wC2AGYBkyQdEVEPFB12KHAAxGxi6QhwMOSLoiINwvJtZlZDy3qfOgRQUTQr1/vq7FuJkebAZMjYkoO0BcBu9UcE8AgpdlnBgLPAbNamlMzs0XU0/nQp06dynrrrcchhxzCiBEjePLJJ7nuuusYPXo0I0aMYM899+Tll19e4HkHH3wwI0eOZIMNNuAHP/gBkKbeXXfddXl48lQA9j7kKH59wZ9a8vqaCeirAk9WPZ6Wt1U7HVgPmA7cCxwREXNqTyTpIEkTJU185plnephlM7OeGTt2LKussgo33ngjX/3qVxfquQ8//DD77bcfd999N8ssswzHHnss119/PXfddRcjR47kxBNPXOA5xx13HBMnTuSee+7hpptu4p577mHw4MGcfvrp7P/VH3DR5dfy/IszOfAzu7fk9TXTKFpvzsfaCWA+AkwCtgPWBP4q6ZaImG8asYg4EzgT0lwuC59dM7POWGONNdhiiy2ANL/5Aw88wJZbbgnAm2++yejRoxd4ziWXXMKZZ57JrFmzeOqpp3jggQfYaKON2GGHHbj0vLU49NvH86+/XtSyPDYT0KcBq1U9HkoqiVc7ADg+0kxfkyU9BrwHuKMluaxWhpZ9M+tzlllmmbl/RwQ77LADF154YZfHP/bYY5xwwglMmDCB5Zdfnv3335/XX38dgDlz5vDgvx9jqQFL8twLLzJ0lXe0JI/NBPQJwNqShgP/AfYC9qk55glge+AWSe8A1gWmtCSHZlZefbQQtsUWW3DooYcyefJk1lprLV599VWmTZvGOuusM/eYmTNnsswyyzB48GD++9//cs0117DNNtsAcNJJJ7He2sP58ZGH8fmvH8NtV5zbVF1+Iw0DekTMknQYcC3QHzgnIu6XNCbvHwv8CDhX0r2kKppvRcSzi5w7Kx//wloow468qst9Uwe0MSMltDDzodcaMmQI5557LnvvvTdvvPEGAMcee+x8AX3jjTdm0003ZYMNNuDd73733OqZRx55hLPOOos7Lv81gwYuw9abj+DYU87imG8cvMivqamBRRFxNXB1zbaxVX9PBz68yLkxMyvY1KlT5/49bdq0pp4zbNgw7rvvvvm2bbfddkyYMGGBY8ePHz/378rKRrUefPDBuXO5nHj015vKQzN6X0dKMzPrEQ/9NzPLPB+6mdlCiAjSGMTepzfNh96T5UFd5WJmbTNgwABmzJjRo2D1dhIRzJgxgwEDFq7l2yV0M2uboUOHMm3aNDxSHHjhf13ve/EhBgwYwNChQxfqlA7oZtY2iy++OMOHD+90NnqHo7foZl/PuvC6ysXMrCRcQseDN8ysHBzQ3wZ8wzJ7e3CVi5lZSTigm5mVhAO6mVlJOKCbmZWEG0XNzKAUUzu7hG5mVhIO6GZmJeGAbmZWEg7oZmYl4YBuZlYSDuhmZiXhbotm1juVoBthuzmgm5kVpN0T4zmg9zYulZhZD7kO3cysJBzQzcxKwgHdzKwkHNDNzErCAd3MrCQc0M3MSsIB3cysJBzQzcxKolcOLGr36CozszJwCd3MrCQc0M3MSsIB3cysJBzQzcxKwgHdzKwkHNDNzEqiqYAuaUdJD0uaLOnILo7ZRtIkSfdLuqm12TQzs0Ya9kOX1B/4BbADMA2YIOmKiHig6pjlgF8CO0bEE5JWLirDZmZWXzMl9M2AyRExJSLeBC4Cdqs5Zh/gTxHxBEBE/K+12TQzs0aaCeirAk9WPZ6Wt1VbB1he0nhJd0rar96JJB0kaaKkic8880zPcmxmZnU1M/RfdbZFnfO8D9geWAq4TdLtEfHIfE+KOBM4E2DkyJG153jb8NQGZlaEZgL6NGC1qsdDgel1jnk2Il4BXpF0M7Ax8AhmZtYWzVS5TADWljRc0hLAXsAVNcdcDnxA0mKSlgY2Bx5sbVbNzKw7DUvoETFL0mHAtUB/4JyIuF/SmLx/bEQ8KGkccA8wBzgrIu4rMuNmZja/pqbPjYirgatrto2tefwz4Gety5qZmS0MjxQ1MyuJXrnAhfVt7sVj1hkuoZuZlYQDuplZSTigm5mVhAO6mVlJuFHUzN42yt5g7xK6mVlJuIRuZh1T9hJzu7mEbmZWEg7oZmYl4YBuZlYSDuhmZiXhgG5mVhIO6GZmJeGAbmZWEg7oZmYl4YBuZlYSDuhmZiXhgG5mVhIO6GZmJeGAbmZWEg7oZmYl4YBuZlYSDuhmZiXhgG5mVhIO6GZmJeGAbmZWEg7oZmYl4UWizVrp6MHd7HuxffmwtyWX0M3MSsIB3cysJBzQzcxKwgHdzKwkHNDNzErCAd3MrCQc0M3MSsIB3cysJJoK6JJ2lPSwpMmSjuzmuFGSZkvao3VZNDOzZjQM6JL6A78AdgLWB/aWtH4Xx/0EuLbVmTQzs8aaGfq/GTA5IqYASLoI2A14oOa4LwN/BEa1NIdm1jt4WoNer5mAvirwZNXjacDm1QdIWhX4BLAd3QR0SQcBBwGsvvrqC5tXs4XnIGRvI83UoavOtqh5fDLwrYiY3d2JIuLMiBgZESOHDBnSbB7NzKwJzZTQpwGrVT0eCkyvOWYkcJEkgJWAj0qaFRGXtSSXZmbWUDMBfQKwtqThwH+AvYB9qg+IiOGVvyWdC1zpYG5m1l4NA3pEzJJ0GKn3Sn/gnIi4X9KYvH9swXk0M7MmNLXARURcDVxds61uII+I/Rc9W2ZmtrA8UtTMrCQc0M3MSsIB3cysJBzQzcxKwgHdzKwkHNDNzErCAd3MrCQc0M3MSsIB3cysJBzQzcxKwgHdzKwkHNDNzErCAd3MrCQc0M3MSsIB3cysJBzQzcxKoqkFLsxsnmFHXtXlvqkD2pgRsxouoZuZlYQDuplZSTigm5mVhAO6mVlJOKCbmZWEA7qZWUk4oJuZlYQDuplZSTigm5mVhAO6mVlJOKCbmZWEA7qZWUk4oJuZlYQDuplZSTigm5mVhAO6mVlJOKCbmZWEA7qZWUk4oJuZlYQDuplZSTigm5mVRFMBXdKOkh6WNFnSkXX2f0bSPfnfPyRt3PqsmplZdxoGdEn9gV8AOwHrA3tLWr/msMeAD0bERsCPgDNbnVEzM+teMyX0zYDJETElIt4ELgJ2qz4gIv4REc/nh7cDQ1ubTTMza6SZgL4q8GTV42l5W1e+AFxTb4ekgyRNlDTxmWeeaT6XZmbW0GJNHKM626LugdK2pIC+Vb39EXEmuTpm5MiRdc9htrCGHXlVl/umDmhjRsw6rJmAPg1YrerxUGB67UGSNgLOAnaKiBmtyZ6ZmTWrmSqXCcDakoZLWgLYC7ii+gBJqwN/Aj4bEY+0PptmZtZIwxJ6RMySdBhwLdAfOCci7pc0Ju8fC3wfWBH4pSSAWRExsrhsm5lZrWaqXIiIq4Gra7aNrfr7i8AXW5s1MzNbGB4pamZWEg7oZmYl4YBuZlYSDuhmZiXhgG5mVhIO6GZmJeGAbmZWEg7oZmYl4YBuZlYSDuhmZiXhgG5mVhIO6GZmJeGAbmZWEg7oZmYl4YBuZlYSDuhmZiXhgG5mVhIO6GZmJeGAbmZWEg7oZmYl4YBuZlYSDuhmZiXhgG5mVhIO6GZmJeGAbmZWEg7oZmYl4YBuZlYSDuhmZiXhgG5mVhIO6GZmJeGAbmZWEg7oZmYl4YBuZlYSDuhmZiXhgG5mVhIO6GZmJeGAbmZWEg7oZmYl0VRAl7SjpIclTZZ0ZJ39knRq3n+PpBGtz6qZmXWnYUCX1B/4BbATsD6wt6T1aw7bCVg7/zsIOKPF+TQzswaaKaFvBkyOiCkR8SZwEbBbzTG7AedHcjuwnKR3tTivZmbWDUVE9wdIewA7RsQX8+PPAptHxGFVx1wJHB8Rt+bHNwDfioiJNec6iFSCB1gXeLgHeV4JeLYHz+spp+f0emt6ZX5tTq9ra0TEkHo7FmviyaqzrfYu0MwxRMSZwJlNpNl1ZqSJETFyUc7h9JxeGdIr82tzej3TTJXLNGC1qsdDgek9OMbMzArUTECfAKwtabikJYC9gCtqjrkC2C/3dtkCeDEinmpxXs3MrBsNq1wiYpakw4Brgf7AORFxv6Qxef9Y4Grgo8Bk4FXggOKyvGhVNk7P6ZUovTK/NqfXAw0bRc3MrG/wSFEzs5JwQDczKwkHdDOzkmimH7rZIpF0RESc0mibGYCkpYDVI6InAw+bOf9G3e2PiHuKSLcdenWjqKR7qTNAiTSQKSKi2w9mEdJdB/gmsAZVN72I2K6g9JYGvk66iA+UtDawbkRcWVB6f2HB9/VFYCLwq4h4vcXp3RURI2q23R0Rm7Y4nRW62x8Rz7UyvZxmW6+VTpC0JjAtIt6QtA2wEWmqjxcKSGsX4ARgiYgYLmkT4IcRsWsL07ilm90REVu3Kq2adN8B/BhYJSJ2ynNijY6Is1uWRi8P6Gt0tz8iHi8o3X8BY4E7gdlV6d1ZUHoX57T2i4gNcwnltojYpKD0TgGGABfmTZ8GngaWApaNiM+2KJ29gX2ArYDqL9EgYHZEfKgV6VSl9xjpRiVgdeD5/PdywBMRMbyV6eU0232t7A78BFiZ9NoqhZtli0gvpzkJGAkMI3VfvoJU4PhoAWndCWwHjK/c8CXdU1ThrZ0kXQP8BvhORGwsaTHg7oh4b6vS6NVVLkUF7CbMioh2zhi5ZkR8OgdAIuI1SfWmU2iVTWtKIX+RdHNEbC3p/ham8w/gKdKcFT+v2v4S0PKftZWALWkscEVEXJ0f7wS09OZRpd3Xyk+BXSLiwTamOSePR/kEcHJEnCbp7oLSmhURLxZ7+c8j6T2kWWQHVLZFxO8LSm6liLhE0lE5nVmSZjd60sLo1QFd0kt0X+VSVKnkL5IOAf4MvFHZWMRP9uzNXCoPmPsT943un7JIhkhaPSKeyOmtTgq6AG+2KpF8Q34cGN2qczZpVESMqcrHNZJ+VFBa7b5W/tvmYA7wVi5sfA7YJW9bvKC07pO0D9A/Vz0eTioYtJyk7wIfBt5D+uXxEeBWoKiA/oqkFZn3Pd+CVNXZMr26yqVT8k/3WhER7y4ovR2A75JKCtcBWwL7R8T4gtL7KKma4FHSzXE4cAgwHjgwIk5ucXptrSaQdC2piud3pC/PvsDWEfGRAtJq97VyCvBO4DLmv4H8qYj0cprrA2NI1YAXShoOfDoiji8graWB75ACLaRAe2yr23VyWvcCmwB35SqQd5HakFpWX1+T3gjgNGBD4D5StecerWyE7VMBXdLKzP/T6IkOZqel8p17C1Kwuz0iCp3GU9KSpJKJgIeK+MJUpTWZNlYT5MbRHwCVaqWbgWMKLDW3jaTf1NkcEfH5tmemxfJiOsdHxDfblN4dEbFZrrffBngZuDciNiwwzcVIU4cLeDgi3mrl+Xt1lUuFpF1JdbCrAP8j9Sh4ENigoPQWBw5mXkAYT7pzt/TNr0rvE8DfIuKq/Hg5SR+PiMsKSm+/mk0bSSIizi8iPdpcTZAD9xGSliXV/75cVFrtvhYVzREAABlPSURBVFYiosh5kuqStCVwNPN68lR+YbX0V0hEzJb0vlaes4G7JS0HnEPq4TUTuKuoxOp870a0+nvXJ0rouSfBdsD1EbGppG2BvSPioAZP7Wl6Z5HqCM/Lmz5L6pXxxYLSm1Tbo6WIbn1V5z6t6uEAYHvSz849CkqvrdUEkt4LnA9UujE+C3wuIu4rIK12XytDST/btyRVJ90KHBER04pIL6f5EPBVFuzJM6OAtH5OWsryUuCVqrRaeq3kTgfvrMwKK2ktUg+vIgN64d+7PlFCB96KiBmS+knqFxE3SvpJgemNioiNqx7/Ld9UilJvxG5hn01EfLn6saTBwG+LSg9YljQL54ertgVQVL3vr4CvRcSNALnv9JnA+wtIq93Xym9IjXZ75sf75m07FJjmixFxTYHnr7YCMINUgKto+bUSEaG00tr78uPJrTx/F2kW/r3rKwH9BUkDSXWhF0j6HzCrwPRmS1ozIh4FkPRuqkomBZgo6UTSYtwBfJlUGmqXV0mlokJ0oJpgmUowz+mPl7RMQWm1+1oZEhHV9ejnSvpKgekB3CjpZ6SgWv0Lq+Wl2TZfK3dIGlFkqbyBln/v+kpA3w14jfSz7zPAYOCHBab3TdJFPIVUX7gGxc7x/mXge8DFOb3rgEOLSqxmpGg/Uu+aSwpMbx3gDOAdeeDURsCuEXFsQUlOkfQ95pV+9gXq9UZphXZfK89K2pd5g8L2JpVoi7R5/r96ubRg/lJ0S+RG33rLVxbR6LsVcKCkR0nVO5W2gRHdP61n2vG96/V16Lnl+9pWjypsIt0lmdca/VBEFNkvvK0kfbDq4Szg8YLrYG8iBb5fVY3+u6+o3gSSlgeOIX1hRfpld3REPF9Qem27VvKYgdOZ17f/76Q69E4NwmspSZ+sejgA+AQwPSIOLyCtNettr/zaKiC9wr93vT6gA0i6AvhsRLS0E36ddLaLiL/lftMLKKBh5uSI+Irqz61CEf1hO3GDlDQhIkZVN/TWawguIN3Cerm0+1rppFzXW90N9CbS/CqFfh9z2v1InSGKmkdpQ9KNH+CWiGjlSOm26ytVLq8D90r6K/O3fLf6rv1B4G/MGw1XrYhGvEqVwAktPm+XctewVyUNbscXMns2l4YqI+T2IE0JUIjaXi6Siujl0u5rBQBJPwWOJVVBjgM2Br4SEb8rIr3sHNJAmE/lx58lNcTWvZm12NqkeXlaTmlpzUNIva8ALpH0i4j4ZYvTaduI975SQv9cve0RcV697S1Ib3hEPNZoWwvT+wRwdbuqdSRdQhrEVPQNspLeu5nXy+R5Un32vhExtaD0/kGaAKm6l8uPI6LlvVw6cK1MiohN8jXzcVK70o01PW0KSbPRthalVQl+yv8/DRwVEX8sIK17gPdXfsHljhf/iD48EVifKKFHxHkqeI7kGn8EahtG/kDu4lSAXYGTJd0MXESqEimyF89V+V9bRMQU4EO5p0m/iHip4CTb2cul3ddKZQ6VjwIXRsRzKn4iq9ckbRURt8LcgUavFZFQRAwq4rxdEFA9AOytvK3YRAsc8d4nArqq5kgGhquAOZJzOu8hjT4dXFM3uixVH0CrRcQBecThTqTpZn8p6a9FDU7JN8gh+e9nikijWh6Ntx9p+tXFKgGoqF8EtKGXS6euFeCKPNDnNeCQ/DkWNm1DdjBwXq5LF/AcsH8rE1Ca56RLBXUt/C1wu6Q/kl7Xx5k3QKzl1IYR732lyqXeHMn3RgvnEc7n3I30oe5KmvO54iXgoogoZNa3qvQXB3YkdXv7QEQMafH5RWrcOox0AfcjtbafFhGFdQPNVSC3A/cCcyrbC6wyK7yXSyeuldxAuAUpCMzM7SHLAIMi4ulWp1cn/WUBImJmAeeu/KIaQOoe+S/SZ7cR8M+I2Kqr5y5iuqOAD+SHt0TEhCLSyWkVPuK9T5TQqT9HcsvvRBFxOXC5pNERcVurz98VSTsCewHbkuYCOYt5DVCt9BXSkPFRlTreXL99hqSvRsRJBaQJMCAivlbQuReQA3dRpf9KGm2/ViJijqSfR8Toqm2vUNUO0kqS9o2I30n6Ws32StontiqtiNg2n/si4KCIuDc/3hD4RqvSqeON/G8OxU5ZDW0Y8d5XAnpb5kiW9H8R8VNgH+XFJqoVWEWwP6nu/EsFN4zuB+wQVTM5RsSUPFDlOqCogP5bSQcCV1LgnOG5e2uXiugGSprg6VDSz+bqetGiZj+8LvfV/lMU//O60u5Qr167qLTfUwnmABFxX65ibTlJ3yFVcf6Z9Gvg95IuiIj/V0R6zBvxfgsFjXjvK1UubZkjWdIuEfGXdveqyWmvAawdEdfnBuDFWt14qG4G83S3rwXpHgocB7zAvEAQ0eLZ+iQ9AzxJGkX5T2oauCLiplaml9O8FHiIFBh+SBrJ/GBEHNHqtHJ6L5EC7WxSPXo7lqDbMiL+3mhbi9K6kPSLo3ou+4ERsUABqwVpPQi8LyJezY+XBu6MiPVanM7ppGtyEukz68e8Ee8XRAsnOesrAX3TiChqyauOy6XXg4AVImLN/CtkbERs3+J0FlisuZl9LUj3UWDzKH6O9/6kSar2JtW9XkXqCVLYYBHlwVLK617mdpBrixoI0wn1ro2irhdJA5h/OuKbgTNaXXjLaY0DPlVpE8htBBdGxM4tTucIUpXqu0jTe1wYEZNamUZFX6lyOVFpNZFLSQ1OhXxB1cWIzYqCfrJDmrdlM1Kpkoj4d+7a1GobS6rXoCWK7ZlxP2kiokJFxGzSYJtxSsPx9wbGS/phRJzW/bN7rNLt7YVc3/s0qTdPIXLD9meA4RHxI0mrAe+KiDsKSGs0aezAkJp69GWB/q1ODyAiXldaE/bqNnRRfhW4X2mFqyDVANyqNFEerWr3iYhTgFPyr/C9gN/kG9fvgYsj4pFWpAN9JKBHxLaS3klqKDwz30kvjtZP7lQZsbk7af7uyui7vYGpLU6r2hsR8WalsUlpVZMiGn0L+RI2YTYwKfdkqK5DL2J+jiWBnUmf2TDgVIqbphfS9bg8aXK1K4CB+e+i/JLUgLcd8CPSKju/AEYVkNYSpNezGPPXo88Eipo7f1fgZxTcRTmrHY9xewFpzBVpvp2fAD+RtClpBO7RtPDm2CeqXKopDev+P9KahksUlMbNEbF1o20tTO+npPrl/UgzLx4CPBAR3ykivXZrV5uEpPNI6zVeQ/ol1/IFLeqk2T//MmiLSlWH5p8X519R7EjRNaJNk3910UX5nujDozcrqrol70Va3OImUvVLy1Ym6xMldEnrAZ8mlQpmkOqhvl5gkkMkvTvSCEeUFsVtaZ/wGkcCXyD10/4ScDWp62IpRPtG+n6W1KC2DnB4VTfXIhsOH8t1sReTlhEsuoT0Vm4rqMyLM4Sqvv0FeVVpPvTanjxFtBPU66JciNxd+EcsuLTeCt0+ceHTqbTr7AzcQerRdlDuctpSfSKgA+eSurwdDEwoooGkxldJda9T8uNhpEBbiNy/+DLgsmjDyM12U5tG+kZEvZWfirYuaYKuQ4FzcjvMRZGHyRfgVFI3u5UlHUcq5Hy3oLQqLiDdsD4GjAE+BxR1nbali3J2Oqkad74BbwX4Nqm+/But7qpbq1dXueS65B8DnweeIN1Bh5JmevtOFLQQb057SeA9+WEhc1znBq7qkZsi1TcXOnKz3br4Gd3ykb6dluvSTwE+U2R7hdK0A9uTrpcbouAFuCXdGRHvq676kHRTRHyw0XN7kFZbuijntMYD20VE0b9w2qa3l9B/RmqMGV7pk50bRE/I/wrp65u9jzz3CKl3SEtX5846NXKz3doy0rdTlBYu+DRpLp4JFDDKN/eKGAOsRSpR/iqKncCtWqXg9JSknYHppIJVy+U+4d/J/4r2f8BfcmCvbqw/tQ1pF6K3l9D/DaxTWy+Z6xAfiohC1sGU9FtgTdJAgEqDV7S6V4aku6kZuZm3DwGuq5Rm+zpJZwM3kNoKPkn6Gb14RIzpaMZaQNJjpOvkEuCKIupFczoXkwLrLaQbx9SIKHot0UraH8vprgacRuq2eExEdDsyt4dp/RXYMyJeyI+XJ1VhfaSAtK4hvae1cwwV2UupUL29hB71GpkiTUpU5J1oJLB+Gxq4Fq832CYinskt4mXxZVKJ6w1SXeK1pEUa+rRcsPhNm6rH1q9UUeUbZMv7ndeTX+PaEXEl8CJpvqEirVQJ5pDm5SloTAbAyhFR1DTHHdGJRqSF8YCk/Wo3Ks098lCB6d5H6odetDd7uK/PyAHhmIj4TkSMyv++24aG7cLl7opFB7iKue1FbaxqqbzGogbU1TNHad1UYO6UGEUVrG6QVJoRvdD7q1xWJQ0KeQ24k/TBjgKWAj4REf8pKN0bgU1IpaDqurVWz78+m/oz5Yk0Q2EpSumS/lamofDVck+TwaReINWrP7V0/u6aa0Wk78CrtGcul7a8xpzWjqTVrSrz7mxN6uJ3bQFpPU96Xa+SClCFdFtsp14d0CvyXXQD0ht+f0TcUHB6dVvvo4DJnd4OJP2ctDbkpcwfEPr8QsqaN493tSjTDazdr1HSSqR53wFur1ct2aJ06vZEaudAsVbrEwG9EyS9g3nDqe+IiP91Mj99maTf1NkcUdwUs9aH5eH/lVHZ43P9fVFp7QW8OyJ+LGko8I6IuLOo9IrmgF6HpE+RukyOJ/0q+ADwzYj4QyfzZb1PvvH/GFglInaStD4wOiLO7nDWWqadr1HS8aSC1AV5097AxIg4qoC0Tiet0bp1RKwnaQXSTJlFzIvTFg7odSgtFbVDpVSeuxFeX+R8GWWWS+j1eiv1+RJ67vpWGei2cR4Md3eZBk218zVKugfYpDLYJ1eL3F3EXC6dmBenaL29l0un9KupYpmB36tFcSXzZra7gdSP+eWO5qh1VoqIS8j9mHMPlD5bB9uFdr/G5ar+HlxgOm8prdNamRdnRYqfF6dQvb0feqeMU5oj+cL8+NOkCbOsByLij9WPlValub5D2Wm1V3IgqASFLUj9tcukna/x/5GW9buRVN25NdDS6hZJi+Wb0i+AP5Im4zuGNML3mFam1W6ucqkiaS1So8jfJe3OvFXjnyctFfVoRzNYEpLWBa6KiLU6nZdFJWkEafTkhqTxC0OAPSLino5mrIXa9Rrz3EZDSetsjiJ99/4ZEU+3OJ25qy1J2gD4UE7r+mjDlMtFckCvIulK4Nu1F6qkkcAPImKXzuSsb1NaB7P6QnsaOKq25N5X5TrldUlB4eEocNK4TmnXa6xMBFbEuavSmFtnXjaucpnfsHqljoiYKGlY+7NTDhFRb9X4UpC0JzAuIu6X9F1ghKRjixh00275V2o96+TJ6ooYR3C7pFERMaGAc1fULqk3n4g4scC0C+WAPr/u1tVcqm25KBlJWwKTIuKVPG3DCOCUaNMqOAX7XkRcKmkr4COkWUDPADbvbLZaovKLdGXS2qJ/y4+3JXXpLSKgbwuMkTSVNAitMnqzlb1c+pOW1it+FY02c0Cf3wRJB0bEr6s3SvoCaeoB65kzSFMQb0yasvRs4Hyg5fNpd0Clt8fOpNXpL5d0dAfz0zIRcQDMrYpcPyKeyo/fRWpQLMJOBZ232lNtmlCt7RzQ5/cV4M+SPsO8AD6StNLOJzqWq75vVkSEpN1IJfOz1cU6o33QfyT9itSw9hOlhVHK1sV1WCWYZ/8lLfPXMnXmez+7wEnISlcyr3CjaB2StiW16EOaO+Zv3R1v3ZN0EzAOOIDUDe0ZUhVMnx98o7TCzo7AvRHx71x6fW9EXNfhrLVMHlG5Nqkbb5AWOZ4cEV9uYRq1870/HhGFLGAjaYUoeCm4TnFAt8JJeiewD2k92Fvy9KjbROtXgOqI3K1vK1Kw+3sZGkRr5QbSD+SHN0fEn1t8/rlLEuYeNXdUuhZa8xzQzRaBpO8DezKvgfDjwKUR0ecX8Gin6r7h9R5bcxzQrXB5ZOFpwHqk9oj+wMsRUeSw7raQ9CCwaWXBDklLAXdFxHqdzdmik3RrRGxVZxxBy+dg7+R872XiRlFrh9NJ9a6XkhqZ9yPVyZbBVFJ318oKTEsCpRhRHBFb5f8LH0cQEXXnJreF44BubRERkyX1z4sH/EbSPzqdp0Uh6TRSqfUN4H6lxY0h9Xa5tWMZK4CkNYFpEfGGpG2AjYDzo2rtT+sdHNCtHV6VtAQwSdJPgaeAZTqcp0U1Mf//AGkGyTmkPun1Vvfp6/4IjMxzHZ0NXEFa7PujHc2VLcB16Fa4vNDvf0n1518lTYn6y4iY3NGMLQJJiwPHAZ8HHif1PV+NNG/4t8s0n0vVvOHfBF6PiNPKPB9KX+YSuhUuIh7PjYXviog+PT1plZ+Sho8Pj4iXACQtSxr6/zPSILWyeEvS3sDnmDcdQCkWMC+bso1os15I0i7AJNLgIiRtIumKzuZqkX2MtBr9S5UNETETOJg0DUCZHACMBo6LiMckDQd+1+E8WR2ucrHCSboT2I604G9lqa97ilhWrF0kPRIRdYe/d7fPrEguoVs7zIqIsq3i84Ck/Wo35tkkH+pAfgojaUtJf5X0iKQpkh6TNKXT+bIFuQ7d2uE+SfsA/SWtDRwO9Olui8ChwJ8kfZ40kVuQVtlZivJN5HY2qTH7Tsq3XmqpuMrFCpcnsPoO8GHSyL9rgR9VRlf2ZZK2AzYgva77I+KGDmep5ST9MyLKML976Tmgm1m3JB1Pmq7hT6SBVACUcRKyvs4B3QrTqCdLROzarrxYz0mqN1gqImK7tmfGuuWAboWR9AzwJGke7X9Ss7BARNzUiXyZlZUDuhVGUn9gB2Bv0vwfVwEXRsT9Hc2YLRRJg4EfkBYnAbgJ+GEJey71ee62aIWJiNkRMS4iPgdsAUwGxktq2Uo31hbnAC8Bn8r/ZpKmOLBexiV0K1ReY3NnUil9GGlip3Mi4j+dzJc1T9KkiNik0TbrPPdDt8JIOo+0Nus1wDERcV+Hs2Q985qkrSLiVkgDjYDXOpwnq8MldCuMpDnMW4Wm0BVvrDiSNgHOI82SCfA8sH9E/KtzubJ6HNDNrCl5NsnKJGTWC7lR1My6JenHkpaLiJkRMVPS8pK8CHYv5IBuZo3sVL3cXEQ8j1cr6pUc0M2skf65txIAebGSJbs53jrEvVzMrJHfATdI+g2pcfvzpEZS62XcKGpmDUnaEfgQqYfSdRFxbYezZHU4oJvZIpF0W0SM7nQ+zHXoZrboBnQ6A5Y4oJvZovLP/F7CAd3MrCQc0M1sUanxIdYODuhm1i1Jh0lavptDPtu2zFi3HNDNrJF3AhMkXSJpR0m1K095Fs1ewt0WzayhHMQ/DBwAjAQuAc6OiEc7mjGbj0voZtZQpJLf0/nfLGB54A+SftrRjNl8XEI3s25JOhz4HPAscBZwWUS8Jakf8O+IWLOjGbS5PJeLmTWyErB7RDxevTEi5kj6WIfyZHW4ysXMGrkaeK7yQNIgSZsDRMSDHcuVLcBVLmbWLUl3AyNyPTq5qmViRIzobM6slkvoZtaIoqrkFxFzcHVtr+SAbmaNTJF0uKTF878jgCmdzpQtyAHdzBoZA7wf+A8wDdgcOKijObK6XIduZlYSrgczs25JGgIcCAyjKmZExOc7lSerzwHdzBq5HLgFuB6Y3eG8WDdc5WJm3ZI0KSI26XQ+rDE3ippZI1dK+minM2GNuYRuZt2S9BKwDPAG8BZpQYuIiGU7mjFbgAO6mVlJuFHUzBrKKxatDQyobIuImzuXI6vHAd3MuiXpi8ARwFBgErAFcBuwXSfzZQtyo6iZNXIEMAp4PCK2BTYFnulslqweB3Qza+T1iHgdQNKSEfEQsG6H82R1uMrFzBqZJmk54DLgr5KeB6Z3OE9Wh3u5mFnTJH0QGAyMi4g3O50fm58Dupk1JKk/8A7mn8vlic7lyOpxlYuZdUvSl4EfAP8F5uTNAWzUsUxZXS6hm1m3JE0GNo+IGZ3Oi3XPvVzMrJEngRc7nQlrzFUuZtbIFGC8pKtI87kAEBEndi5LVo8Dupk18kT+t0T+Z72U69DNzErCJXQz65akv5B6tVR7EZgI/KoyitQ6z42iZtbIFOBl4Nf530xSF8Z18mPrJVzlYmbdknRzRGxdb5uk+yNig07lzebnErqZNTJE0uqVB/nvlfJDD//vRVyHbmaNfB24VdKjpOXnhgOHSFoGOK+jObP5uMrFzBqStCTwHlJAf8gNob2Tq1zMrFuSlga+CRwWEZOA1SR9rMPZsjoc0M2skd+Q6spH58fTgGM7lx3rigO6mTWyZkT8FHgLICJeI1W9WC/jgG5mjbwpaSny4CJJa1I1p4v1Hu7lYmaN/AAYR6o7vwDYEti/ozmyutzLxcy6JEnAUOBVYAtSVcvtEfFsRzNmdTmgm1m3JN0ZEe/rdD6sMdehm1kjt0sa1elMWGMuoZtZtyQ9AKwLTAVeIVW7RER4TdFexgHdzLolaY162yPi8XbnxbrnXi5mVpekAcAYYC3gXuDsiJjV2VxZd1xCN7O6JF1MGkx0C7AT8HhEHNHZXFl3HNDNrC5J90bEe/PfiwF3RMSIDmfLuuFeLmbWlbcqf7iqpW9wCd3M6pI0m9SrBVLPlqVIA4wqvVyW7VTerD4HdDOzknCVi5lZSTigm5mVhAO6mVlJOKCbmZXE/wfVggMep39pwQAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# plot performance by entity type\n",
+    "df.plot(x='type', y=['f1_strict', 'f1_relax'], title= f'Strict and Relax F1_Score of RoBERTa-MIMIC on FRD', kind=\"bar\", rot=90)\n",
+    "plt.xlabel(\"\");"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(f\"{outfolder}/{dataset}_results/{dataset}_strict_match_{model}.csv\", \"w\", encoding=\"utf-8\", newline='') as fstrict, open(f\"{outfolder}/{dataset}_results/{dataset}_relax_match_{model}.csv\", \"w\", encoding=\"utf-8\", newline='') as frelax, open(f\"{outfolder}/{dataset}_results/{dataset}_miss_match_{model}.csv\", \"w\", encoding=\"utf-8\", newline='') as fmiss:\n",
+    "    fs_writer = csv.writer(fstrict)\n",
+    "    fr_writer = csv.writer(frelax)\n",
+    "    fm_writer = csv.writer(fmiss)\n",
+    "    fs_writer.writerow(['NCT_ID', 'Entity', 'Offsets', 'Golden_Label', 'Prediction'])\n",
+    "    fr_writer.writerow(['NCT_ID', 'Entity', 'Offsets', 'Golden_Label', 'Prediction'])\n",
+    "    fm_writer.writerow(['NCT_ID', 'Entity', 'Offsets', 'Golden_Label', 'Prediction'])\n",
+    "    for nct_id, nct in predictions.items():\n",
+    "        for kk, vv in nct.items():\n",
+    "            if kk == \"predicted\":\n",
+    "                for k, v in vv.items():\n",
+    "                    if k == \"strict\":\n",
+    "                        for ent in v:\n",
+    "                            for i in ent:\n",
+    "                                i = i.split()\n",
+    "                                fs_writer.writerow([nct_id,i[0],' '.join(i[1:5]),i[5],i[6]])\n",
+    "                            fs_writer.writerow([])\n",
+    "                    if k == \"relax\":\n",
+    "                        for ent in v:\n",
+    "                            for i in ent:\n",
+    "                                i = i.split()\n",
+    "                                fr_writer.writerow([nct_id,i[0],' '.join(i[1:5]),i[5],i[6]])\n",
+    "                            fr_writer.writerow([])\n",
+    "                    if k == \"miss\":\n",
+    "                        for ent in v:\n",
+    "                            for i in ent:\n",
+    "                                i = i.split()\n",
+    "                                fm_writer.writerow([nct_id,i[0],' '.join(i[1:5]),i[5],i[6]])\n",
+    "                            fm_writer.writerow([])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}