[735bb5]: / src / constants / __init__.py

Download this file

145 lines (126 with data), 3.6 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# coding: utf-8
"""
Defining global constants
"""
# Base Dependencies
# -----------------
from enum import Enum
from pathlib import Path
# Constants
# ---------
from .n2c2 import * # N2C2 Dataset Constants
from .ddi import * # DDI Dataset Constants
DATASETS_PATHS = {"n2c2": N2C2_PATH, "ddi": DDI_PATH}
DATASETS = list(DATASETS_PATHS.keys())
# experiments' random seeds
EXP_RANDOM_SEEDS = [2, 13, 41, 89, 67]
# vocabulary special tokens
PAD_TOKEN, PAD_ID = "<pad>", 0
BOS_TOKEN, BOS_ID = "<s>", 1
EOS_TOKEN, EOS_ID = "</s>", 2
UNK_TOKEN, UNK_ID = "<unk>", 3
# Word Embeddigns
BIOWORD2VEC_PATH = Path("data/bioword2vec/bio_embedding_extrinsic.txt")
# ML MODELS
CHECKPOINTS_CACHE_DIR = Path("./cache/checkpoints")
MODELS_CACHE_DIR = Path("./cache/models")
MODELS = {"bert": {"clinical-bert": "emilyalsentzer/Bio_ClinicalBERT"}}
# Universal PoS Tagging
# Source: https://github.com/explosion/spaCy/blob/master/spacy/glossary.py
U_POS_GLOSSARY = {
"ADJ": "adjective",
"ADP": "adposition",
"ADV": "adverb",
"AUX": "auxiliary",
"CONJ": "conjunction",
"CCONJ": "coordinating conjunction",
"DET": "determiner",
"INTJ": "interjection",
"NOUN": "noun",
"NUM": "numeral",
"PART": "particle",
"PRON": "pronoun",
"PROPN": "proper noun",
"PUNCT": "punctuation",
"SCONJ": "subordinating conjunction",
"SYM": "symbol",
"VERB": "verb",
"X": "other",
"EOL": "end of line",
"SPACE": "space",
}
U_POS_TAGS = list(U_POS_GLOSSARY.keys())
# Dependency tagging
DEP_GLOSSARY = {
"ROOT": "root",
"acl": "clausal modifier of noun (adjectival clause)",
"acl:relcl": None,
"acomp": "adjectival complement",
"advcl": "adverbial clause modifier",
"advmod": "adverbial modifier",
"amod": "adjectival modifier",
"amod@nmod": None,
"appos": "appositional modifier",
"attr": "attribute",
"aux": "auxiliary",
"auxpass": "auxiliary (passive)",
"case": "case marking",
"cc": "coordinating conjunction",
"cc:preconj": None,
"ccomp": "clausal complement",
"compound": "compound",
"compound:prt": None,
"conj": "conjunct",
"cop": "copula",
"csubj": "clausal subject",
"dative": "dative",
"dep": "unclassified dependent",
"det": "determiner",
"det:predet": None,
"dobj": "direct object",
"expl": "expletive",
"intj": "interjection",
"mark": "marker",
"meta": "meta modifier",
"mwe": None,
"neg": "negation modifier",
"nmod": "modifier of nominal",
"nmod:npmod": None,
"nmod:poss": None,
"nmod:tmod": None,
"nsubj": "nominal subject",
"nsubjpass": "nominal subject (passive)",
"nummod": "numeric modifier",
"parataxis": "parataxis",
"pcomp": "complement of preposition",
"pobj": "object of preposition",
"preconj": "pre-correlative conjunction",
"predet": None,
"prep": "prepositional modifier",
"punct": "punctuation",
"quantmod": "modifier of quantifier",
"xcomp": "open clausal complement",
}
DEP_TAGS = list(DEP_GLOSSARY.keys())
# BiLSTM Model
RD_EMB_DIM = 25
IOB_EMB_DIM = 5
BIOWV_EMB_DIM = 200
POS_EMB_DIM = 20
DEP_EMB_DIM = 20
# Active Learning Strategies
class BaalQueryStrategy(Enum):
RANDOM = "random"
LC = "least_confidence"
BATCH_BALD = "batch_bald"
class RFQueryStrategy(Enum):
RANDOM = "random"
LC = "least_confidence"
BATCH_LC = "bach_least_confidence"
# Methods
METHODS_NAMES = {
"rf": "Random Forest",
"bilstm": "BiLSTM",
"bert": "Clinical BERT",
"bert-pairs": "Paired Clinical BERT",
}