|
a |
|
b/scripts/experiments_all.sh |
|
|
1 |
#!/bin/bash |
|
|
2 |
# Use this script to execute all experiments for a single corpus |
|
|
3 |
# Evaluation output will be in output/evaluation/<CORPUS>/summary{train,dev,test}.csv |
|
|
4 |
|
|
|
5 |
CORPUS=ons # (ons|i2b2|nursing) |
|
|
6 |
|
|
|
7 |
conda activate deidentify |
|
|
8 |
|
|
|
9 |
# Disable MKL multithreading as it will actually slow down spaCy tokenization |
|
|
10 |
export MKL_NUM_TRHEADS=1 |
|
|
11 |
|
|
|
12 |
# DEDUCE, only if on "ons" corpus |
|
|
13 |
if [ "$CORPUS" == "ons" ]; then |
|
|
14 |
python deidentify/methods/deduce/run_deduce.py ons-flattened run_1 |
|
|
15 |
python deidentify/methods/deduce/unflatten_deduce_predictions.py ons deduce_run_1 |
|
|
16 |
fi |
|
|
17 |
|
|
|
18 |
# CRF |
|
|
19 |
python deidentify/methods/crf/run_crf.py "$CORPUS" run_1 liu_2015 |
|
|
20 |
python deidentify/methods/crf/run_crf_hyperopt.py "$CORPUS" regularization_rs liu_2015 \ |
|
|
21 |
--n_iter 250 --n_jobs 44 |
|
|
22 |
|
|
|
23 |
# BiLSTM, should be run on a GPU machine |
|
|
24 |
export CUDA_VISIBLE_DEVICES=0 |
|
|
25 |
python deidentify/methods/bilstmcrf/run_bilstmcrf.py "$CORPUS" initial_run \ |
|
|
26 |
--pooled_contextual_embeddings |
|
|
27 |
python deidentify/methods/bilstmcrf/run_bilstmcrf.py "$CORPUS" train_with_dev \ |
|
|
28 |
--train_with_dev --pooled_contextual_embeddings |
|
|
29 |
|
|
|
30 |
# Evaluation |
|
|
31 |
python deidentify/evaluation/evaluate_corpus.py "$CORPUS" |