|
a |
|
b/Makefile |
|
|
1 |
# ======Generate data variables======== |
|
|
2 |
task=re |
|
|
3 |
input_dir=data/ |
|
|
4 |
ade_dir=ade_corpus/ |
|
|
5 |
target_dir=biobert_re/dataset/ |
|
|
6 |
max_seq_len=128 |
|
|
7 |
dev_split=0.1 |
|
|
8 |
tokenizer=biobert-base |
|
|
9 |
file_ext=tsv |
|
|
10 |
sep=tab |
|
|
11 |
|
|
|
12 |
# ========BioBERT NER training variables======== |
|
|
13 |
ner_biobert_save_dir=./output |
|
|
14 |
ner_biobert_data_dir=./dataset |
|
|
15 |
ner_biobert_model_name=dmis-lab/biobert-large-cased-v1.1 |
|
|
16 |
ner_biobert_max_len=128 |
|
|
17 |
ner_biobert_batch_size=8 |
|
|
18 |
ner_biobert_epochs=1 |
|
|
19 |
ner_biobert_save_steps=4000 |
|
|
20 |
ner_biobert_seed=0 |
|
|
21 |
|
|
|
22 |
# ========BioBERT RE training variables======== |
|
|
23 |
re_biobert_save_dir=./output |
|
|
24 |
re_biobert_data_dir=./dataset |
|
|
25 |
re_biobert_model_name=dmis-lab/biobert-base-cased-v1.1 |
|
|
26 |
re_biobert_config_name=bert-base-cased |
|
|
27 |
re_biobert_max_len=128 |
|
|
28 |
re_biobert_batch_size=8 |
|
|
29 |
re_biobert_epochs=3 |
|
|
30 |
re_biobert_save_steps=6264 |
|
|
31 |
re_biobert_seed=1 |
|
|
32 |
re_biobert_lr=5e-5 |
|
|
33 |
|
|
|
34 |
# ========FastAPI======== |
|
|
35 |
fast_api_fname=fast_api |
|
|
36 |
|
|
|
37 |
|
|
|
38 |
# Generates data |
|
|
39 |
generate-data: |
|
|
40 |
python generate_data.py \ |
|
|
41 |
--task ${task} \ |
|
|
42 |
--input_dir ${input_dir} \ |
|
|
43 |
--ade_dir ${ade_dir} \ |
|
|
44 |
--target_dir ${target_dir} \ |
|
|
45 |
--max_seq_len ${max_seq_len} \ |
|
|
46 |
--dev_split ${dev_split} \ |
|
|
47 |
--tokenizer ${tokenizer} \ |
|
|
48 |
--ext ${file_ext} \ |
|
|
49 |
--sep ${sep} |
|
|
50 |
|
|
|
51 |
# Trains BioBERT NER model |
|
|
52 |
train-biobert-ner: |
|
|
53 |
cd biobert_ner/ && \ |
|
|
54 |
python run_ner.py \ |
|
|
55 |
--data_dir ${ner_biobert_data_dir}/ \ |
|
|
56 |
--labels ${ner_biobert_data_dir}/labels.txt \ |
|
|
57 |
--model_name_or_path ${ner_biobert_model_name} \ |
|
|
58 |
--output_dir ${ner_biobert_save_dir}/ \ |
|
|
59 |
--max_seq_length ${ner_biobert_max_len} \ |
|
|
60 |
--num_train_epochs ${ner_biobert_epochs} \ |
|
|
61 |
--per_device_train_batch_size ${ner_biobert_batch_size} \ |
|
|
62 |
--save_steps ${ner_biobert_save_steps} \ |
|
|
63 |
--seed ${ner_biobert_seed} \ |
|
|
64 |
--do_train \ |
|
|
65 |
--do_eval \ |
|
|
66 |
--do_predict \ |
|
|
67 |
--overwrite_output_dir |
|
|
68 |
|
|
|
69 |
# Trains the BiLSTM NER model |
|
|
70 |
train-bilstm: |
|
|
71 |
cd bilstm_crf_ner && \ |
|
|
72 |
python train.py |
|
|
73 |
|
|
|
74 |
# Trains BioBERT RE model |
|
|
75 |
train-biobert-re: |
|
|
76 |
cd biobert_re/ && \ |
|
|
77 |
python run_re.py \ |
|
|
78 |
--task_name ehr-re \ |
|
|
79 |
--config_name ${re_biobert_config_name} \ |
|
|
80 |
--data_dir ${re_biobert_data_dir} \ |
|
|
81 |
--model_name_or_path ${re_biobert_model_name} \ |
|
|
82 |
--max_seq_length ${re_biobert_max_len} \ |
|
|
83 |
--num_train_epochs ${re_biobert_epochs} \ |
|
|
84 |
--per_device_train_batch_size ${re_biobert_batch_size} \ |
|
|
85 |
--save_steps ${re_biobert_save_steps} \ |
|
|
86 |
--seed ${re_biobert_seed} \ |
|
|
87 |
--do_train \ |
|
|
88 |
--do_eval \ |
|
|
89 |
--do_predict \ |
|
|
90 |
--learning_rate ${re_biobert_lr} \ |
|
|
91 |
--output_dir ${re_biobert_save_dir} \ |
|
|
92 |
--overwrite_output_dir |
|
|
93 |
|
|
|
94 |
# Starts the FastAPI server in debug mode |
|
|
95 |
start-api-local: |
|
|
96 |
uvicorn ${fast_api_fname}:app --reload |
|
|
97 |
|
|
|
98 |
# Starts api on GCP |
|
|
99 |
start-api-gcp: |
|
|
100 |
gunicorn -b 0.0.0.0:8000 -w 4 -k uvicorn.workers.UvicornWorker fast_api:app --timeout 300 --daemon |