|
a |
|
b/medicalbert/cliparser.py |
|
|
1 |
import argparse |
|
|
2 |
|
|
|
3 |
#All the parameters that we can set. |
|
|
4 |
# NB: not all params are used by every classifier. |
|
|
5 |
def setup_parser(): |
|
|
6 |
parser = argparse.ArgumentParser() |
|
|
7 |
|
|
|
8 |
parser.add_argument("--train_from_checkpoint", |
|
|
9 |
default=None, |
|
|
10 |
type=str, |
|
|
11 |
help="Continue training from a saved model.") |
|
|
12 |
|
|
|
13 |
parser.add_argument("--save_tokenized_text", |
|
|
14 |
action='store_true', |
|
|
15 |
help="this will output the tokenized process text into a CSV format") |
|
|
16 |
parser.add_argument("--train", |
|
|
17 |
action='store_true', |
|
|
18 |
help="Whether to run training.") |
|
|
19 |
parser.add_argument("--output_embeddings", |
|
|
20 |
action='store_true', |
|
|
21 |
help="Will take in a classifier and use the underlying model to output the token embeddings") |
|
|
22 |
parser.add_argument("--eval", |
|
|
23 |
action='store_true', |
|
|
24 |
help="Whether to run eval on the dev set.") |
|
|
25 |
parser.add_argument("--use_model", |
|
|
26 |
default=None, |
|
|
27 |
type=str, |
|
|
28 |
help="Use this model for evaluations") |
|
|
29 |
parser.add_argument("--data_dir", |
|
|
30 |
default=None, |
|
|
31 |
type=str, |
|
|
32 |
help="location of input data") |
|
|
33 |
parser.add_argument("--output_dir", |
|
|
34 |
default=None, |
|
|
35 |
type=str, |
|
|
36 |
help="location of output") |
|
|
37 |
parser.add_argument("--training_data", |
|
|
38 |
default=None, |
|
|
39 |
type=str, |
|
|
40 |
help="name of training file") |
|
|
41 |
parser.add_argument("--validation_metric", |
|
|
42 |
default=None, |
|
|
43 |
type=str, |
|
|
44 |
help="metric used to select the best validation checkpoint for testing.") |
|
|
45 |
parser.add_argument("--valid_data", |
|
|
46 |
default=None, |
|
|
47 |
type=str, |
|
|
48 |
help="name of validation file") |
|
|
49 |
parser.add_argument("--evaluator", |
|
|
50 |
default=None, |
|
|
51 |
type=str, |
|
|
52 |
help="evaluation class to use") |
|
|
53 |
parser.add_argument("--seed", |
|
|
54 |
default=None, |
|
|
55 |
type=int, |
|
|
56 |
help="random seed") |
|
|
57 |
parser.add_argument("--device", |
|
|
58 |
default=None, |
|
|
59 |
type=str, |
|
|
60 |
help="cpu or cuda") |
|
|
61 |
parser.add_argument("--experiment_name", |
|
|
62 |
default=None, |
|
|
63 |
type=str, |
|
|
64 |
help="name of the experiment") |
|
|
65 |
parser.add_argument("--learning_rate", |
|
|
66 |
default=None, |
|
|
67 |
type=float, |
|
|
68 |
help="learning_rate") |
|
|
69 |
parser.add_argument("--pretrained_model", |
|
|
70 |
default=None, |
|
|
71 |
type=str, |
|
|
72 |
help="pretrained model to train upon.") |
|
|
73 |
parser.add_argument("--num_sections", |
|
|
74 |
default=None, |
|
|
75 |
type=int, |
|
|
76 |
help="chunks of text") |
|
|
77 |
parser.add_argument("--tokenizer", |
|
|
78 |
default=None, |
|
|
79 |
type=str, |
|
|
80 |
help="tokenizer model to use") |
|
|
81 |
parser.add_argument("--num_train_examples", |
|
|
82 |
default=None, |
|
|
83 |
type=int, |
|
|
84 |
help="number of training examples") |
|
|
85 |
parser.add_argument("--target", |
|
|
86 |
default=None, |
|
|
87 |
type=str, |
|
|
88 |
help="target column") |
|
|
89 |
parser.add_argument("--classifier", |
|
|
90 |
default=None, |
|
|
91 |
type=str, |
|
|
92 |
help="classifier to use") |
|
|
93 |
parser.add_argument("--epochs", |
|
|
94 |
default=None, |
|
|
95 |
type=int, |
|
|
96 |
help="Number of epochs to train for") |
|
|
97 |
parser.add_argument("--train_batch_size", |
|
|
98 |
default=None, |
|
|
99 |
type=int, |
|
|
100 |
help="batch size during training phase") |
|
|
101 |
parser.add_argument("--gradient_accumulation_steps", |
|
|
102 |
default=None, |
|
|
103 |
type=int, |
|
|
104 |
help="used to reduce GPU memory footprint") |
|
|
105 |
parser.add_argument("--datareader", |
|
|
106 |
default=None, |
|
|
107 |
type=str, |
|
|
108 |
help="approach to reading the data from files.") |
|
|
109 |
parser.add_argument("--vocab_size", |
|
|
110 |
default=None, |
|
|
111 |
type=int, |
|
|
112 |
help="Size of vocabulary.") |
|
|
113 |
parser.add_argument("--embed_size", |
|
|
114 |
default=None, |
|
|
115 |
type=int, |
|
|
116 |
help="Size of vocabulary.") |
|
|
117 |
parser.add_argument("--layer", |
|
|
118 |
default=None, |
|
|
119 |
type=int, |
|
|
120 |
help="If the classifier only uses parts of a model then use this") |
|
|
121 |
parser.add_argument("--max_sequence_length", |
|
|
122 |
default=None, |
|
|
123 |
type=int, |
|
|
124 |
help="maximum sequence length, each document will be truncated to this length.") |
|
|
125 |
parser.add_argument("--num_layers", |
|
|
126 |
default=None, |
|
|
127 |
type=int, |
|
|
128 |
help="The number of encoding layers for a BERT model to keep.") |
|
|
129 |
return parser.parse_args() |