deidentify / Git / [7fc5df] /scripts/training_data

Models:

philipB/

deidentify

Downloads: 1

[7fc5df]: / scripts / training_data_tradeoff.sh

History

Download this file

25 lines (19 with data), 910 Bytes

#!/bin/bash
CORPUS=ons # (ons|i2b2|nursing)

source activate deidentify

# Disable MKL multithreading as it will actually slow down spaCy tokenization
export MKL_NUM_TRHEADS=1
# Specify GPU to run on
export CUDA_VISIBLE_DEVICES=0

# Fraction of training data to use
train_sizes=(0.1 0.25 0.4 0.55 0.7 0.85 1)
# Random seeds for sampling the training data. The number of seeds corresponds to the number of
# repetitions for each training size.
seeds=(42 43 44)

for size in "${train_sizes[@]}"; do
    for seed in "${seeds[@]}"; do
        echo "========= size: $size - seed: $seed ========="

        python deidentify/methods/crf/run_crf_training_sample.py "$CORPUS" subset_training liu_2015 --train_sample_frac="$size" --random_seed="$seed"
        python deidentify/methods/bilstmcrf/run_bilstmcrf_training_sample.py "$CORPUS" subset_training --train_sample_frac="$size" --random_seed="$seed"
    done
done