Diff of /src/constants/n2c2.py [000000] .. [735bb5]

Switch to unified view

a b/src/constants/n2c2.py
1
# coding: utf-8
2
3
# Base Dependencies
4
# -----------------
5
import re
6
from pathlib import Path
7
from os.path import join as pjoin
8
9
10
N2C2_VOCAB_PATH = Path(pjoin("data", "n2c2", "vocab", "vocab.txt"))
11
12
N2C2_PATH = Path("data/n2c2")
13
N2C2_ENTITY_TYPES = [
14
    "Drug",
15
    "Strength",
16
    "Duration",
17
    "Route",
18
    "Form",
19
    "ADE",
20
    "Dosage",
21
    "Reason",
22
    "Frequency",
23
]
24
N2C2_ATTR_TYPES = [
25
    "Strength",
26
    "Duration",
27
    "Route",
28
    "Form",
29
    "ADE",
30
    "Dosage",
31
    "Reason",
32
    "Frequency",
33
]
34
35
N2C2_REL_TYPES = [
36
    "Strength-Drug",
37
    "Duration-Drug",
38
    "Route-Drug",
39
    "Form-Drug",
40
    "ADE-Drug",
41
    "Dosage-Drug",
42
    "Reason-Drug",
43
    "Frequency-Drug",
44
]
45
46
N2C2_REL_TEST_WEIGHTS = [
47
    10255 / 41086,
48
    568 / 41086,
49
    6784 / 41086,
50
    5382 / 41086,
51
    981 / 41086,
52
    3563 / 41086,
53
    4335 / 41086,
54
    9218 / 41086, 
55
]
56
57
N2C2_ATTR_ENTITY_CANDIDATES = {
58
    "Strength": ["Drug"],
59
    "Duration": ["Drug"],
60
    "Route": ["Drug"],
61
    "Form": ["Drug"],
62
    "ADE": ["Drug"],
63
    "Dosage": ["Drug"],
64
    "Reason": ["Drug"],
65
    "Frequency": ["Drug"],
66
}
67
68
N2C2_SPLITS_DIR = "data/n2c2/splits"
69
N2C2_HF_TRAIN_PATH = "data/n2c2/train.hf"
70
N2C2_HF_TEST_PATH = "data/n2c2/test.hf"
71
72
73
N2C2_ANNONYM_PATTERNS = {
74
    "hour": re.compile(r"\[\*\*\d+-\d+\*\*\]\s*PM"),
75
    "date": re.compile(
76
        r"(\[\*\*(Date|Month|Year)[^\*]*\*\*\])|(\[\*\*\d+-\d+-?\d*\*\*\])"
77
    ),
78
    "hospital": re.compile(r"(\[\*\*[^\*]*Hospital[^\*]*\*\*\])"),
79
    "name": re.compile(r"(\[\*\*[^\*]*(Name|name)[^\*]*\*\*\])"),
80
    "telephone": re.compile(r"(\[\*\*[^\*]*(Telephone|telephone)[^\*]*\*\*\])"),
81
    "location": re.compile(r"(\[\*\*[^\*]*(Location|location|\d+-/\d+)[^\*]*\*\*\])"),
82
    "address": re.compile(r"(\[\*\*[^\*]*(Address|address|Country|State)[^\*]*\*\*\])"),
83
    "age": re.compile(r"(\[\*\*[^\*]*(Age)[^\*]*\*\*\])"),
84
    "number": re.compile(
85
        r"(\[\*\*[^\*]*(Number|Numeric Identifier|number)[^\*]*\*\*\])|(\[\*\*\d+\*\*\])"
86
    ),
87
}
88
89
N2C2_IOB_TAGS = [
90
    "O",
91
    "B-Drug",
92
    "I-Drug",
93
    "B-Strength",
94
    "I-Strength",
95
    "B-Duration",
96
    "I-Duration",
97
    "B-Route",
98
    "I-Route",
99
    "B-Form",
100
    "I-Form",
101
    "B-ADE",
102
    "I-ADE",
103
    "B-Dosage",
104
    "I-Dosage",
105
    "B-Reason",
106
    "I-Reason",
107
    "B-Frequency",
108
    "I-Frequency",
109
]
110
111
N2C2_RD_MAX = 30