a b/Makefile
1
# ======Generate data variables========
2
task=re
3
input_dir=data/
4
ade_dir=ade_corpus/
5
target_dir=biobert_re/dataset/
6
max_seq_len=128
7
dev_split=0.1
8
tokenizer=biobert-base
9
file_ext=tsv
10
sep=tab
11
12
# ========BioBERT NER training variables========
13
ner_biobert_save_dir=./output
14
ner_biobert_data_dir=./dataset
15
ner_biobert_model_name=dmis-lab/biobert-large-cased-v1.1
16
ner_biobert_max_len=128
17
ner_biobert_batch_size=8
18
ner_biobert_epochs=1
19
ner_biobert_save_steps=4000
20
ner_biobert_seed=0
21
22
# ========BioBERT RE training variables========
23
re_biobert_save_dir=./output
24
re_biobert_data_dir=./dataset
25
re_biobert_model_name=dmis-lab/biobert-base-cased-v1.1
26
re_biobert_config_name=bert-base-cased
27
re_biobert_max_len=128
28
re_biobert_batch_size=8
29
re_biobert_epochs=3
30
re_biobert_save_steps=6264
31
re_biobert_seed=1
32
re_biobert_lr=5e-5
33
34
# ========FastAPI========
35
fast_api_fname=fast_api
36
37
38
# Generates data
39
generate-data:
40
    python generate_data.py \
41
    --task ${task} \
42
    --input_dir ${input_dir} \
43
    --ade_dir ${ade_dir} \
44
    --target_dir ${target_dir} \
45
    --max_seq_len ${max_seq_len} \
46
    --dev_split ${dev_split} \
47
    --tokenizer ${tokenizer} \
48
    --ext ${file_ext} \
49
    --sep ${sep}
50
51
# Trains BioBERT NER model
52
train-biobert-ner:
53
    cd biobert_ner/ && \
54
    python run_ner.py \
55
    --data_dir ${ner_biobert_data_dir}/ \
56
    --labels ${ner_biobert_data_dir}/labels.txt \
57
    --model_name_or_path ${ner_biobert_model_name} \
58
    --output_dir ${ner_biobert_save_dir}/ \
59
    --max_seq_length ${ner_biobert_max_len} \
60
    --num_train_epochs ${ner_biobert_epochs} \
61
    --per_device_train_batch_size ${ner_biobert_batch_size} \
62
    --save_steps ${ner_biobert_save_steps} \
63
    --seed ${ner_biobert_seed} \
64
    --do_train \
65
    --do_eval \
66
    --do_predict \
67
    --overwrite_output_dir
68
69
# Trains the BiLSTM NER model
70
train-bilstm:
71
    cd bilstm_crf_ner && \
72
    python train.py
73
74
# Trains BioBERT RE model
75
train-biobert-re:
76
    cd biobert_re/ && \
77
    python run_re.py \
78
    --task_name ehr-re \
79
    --config_name ${re_biobert_config_name} \
80
    --data_dir ${re_biobert_data_dir} \
81
    --model_name_or_path ${re_biobert_model_name} \
82
    --max_seq_length ${re_biobert_max_len} \
83
    --num_train_epochs ${re_biobert_epochs} \
84
    --per_device_train_batch_size ${re_biobert_batch_size} \
85
    --save_steps ${re_biobert_save_steps} \
86
    --seed ${re_biobert_seed} \
87
    --do_train \
88
    --do_eval \
89
    --do_predict \
90
    --learning_rate ${re_biobert_lr} \
91
    --output_dir ${re_biobert_save_dir} \
92
    --overwrite_output_dir
93
94
# Starts the FastAPI server in debug mode
95
start-api-local:
96
    uvicorn ${fast_api_fname}:app --reload
97
98
# Starts api on GCP
99
start-api-gcp:
100
    gunicorn -b 0.0.0.0:8000 -w 4 -k uvicorn.workers.UvicornWorker fast_api:app --timeout 300 --daemon