Switch to unified view

a b/scripts/training_data_tradeoff.sh
1
#!/bin/bash
2
CORPUS=ons # (ons|i2b2|nursing)
3
4
source activate deidentify
5
6
# Disable MKL multithreading as it will actually slow down spaCy tokenization
7
export MKL_NUM_TRHEADS=1
8
# Specify GPU to run on
9
export CUDA_VISIBLE_DEVICES=0
10
11
# Fraction of training data to use
12
train_sizes=(0.1 0.25 0.4 0.55 0.7 0.85 1)
13
# Random seeds for sampling the training data. The number of seeds corresponds to the number of
14
# repetitions for each training size.
15
seeds=(42 43 44)
16
17
for size in "${train_sizes[@]}"; do
18
    for seed in "${seeds[@]}"; do
19
        echo "========= size: $size - seed: $seed ========="
20
21
        python deidentify/methods/crf/run_crf_training_sample.py "$CORPUS" subset_training liu_2015 --train_sample_frac="$size" --random_seed="$seed"
22
        python deidentify/methods/bilstmcrf/run_bilstmcrf_training_sample.py "$CORPUS" subset_training --train_sample_frac="$size" --random_seed="$seed"
23
    done
24
done