[d129b2]: / medicalbert / cliparser.py

Download this file

130 lines (126 with data), 5.6 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import argparse
#All the parameters that we can set.
# NB: not all params are used by every classifier.
def setup_parser():
parser = argparse.ArgumentParser()
parser.add_argument("--train_from_checkpoint",
default=None,
type=str,
help="Continue training from a saved model.")
parser.add_argument("--save_tokenized_text",
action='store_true',
help="this will output the tokenized process text into a CSV format")
parser.add_argument("--train",
action='store_true',
help="Whether to run training.")
parser.add_argument("--output_embeddings",
action='store_true',
help="Will take in a classifier and use the underlying model to output the token embeddings")
parser.add_argument("--eval",
action='store_true',
help="Whether to run eval on the dev set.")
parser.add_argument("--use_model",
default=None,
type=str,
help="Use this model for evaluations")
parser.add_argument("--data_dir",
default=None,
type=str,
help="location of input data")
parser.add_argument("--output_dir",
default=None,
type=str,
help="location of output")
parser.add_argument("--training_data",
default=None,
type=str,
help="name of training file")
parser.add_argument("--validation_metric",
default=None,
type=str,
help="metric used to select the best validation checkpoint for testing.")
parser.add_argument("--valid_data",
default=None,
type=str,
help="name of validation file")
parser.add_argument("--evaluator",
default=None,
type=str,
help="evaluation class to use")
parser.add_argument("--seed",
default=None,
type=int,
help="random seed")
parser.add_argument("--device",
default=None,
type=str,
help="cpu or cuda")
parser.add_argument("--experiment_name",
default=None,
type=str,
help="name of the experiment")
parser.add_argument("--learning_rate",
default=None,
type=float,
help="learning_rate")
parser.add_argument("--pretrained_model",
default=None,
type=str,
help="pretrained model to train upon.")
parser.add_argument("--num_sections",
default=None,
type=int,
help="chunks of text")
parser.add_argument("--tokenizer",
default=None,
type=str,
help="tokenizer model to use")
parser.add_argument("--num_train_examples",
default=None,
type=int,
help="number of training examples")
parser.add_argument("--target",
default=None,
type=str,
help="target column")
parser.add_argument("--classifier",
default=None,
type=str,
help="classifier to use")
parser.add_argument("--epochs",
default=None,
type=int,
help="Number of epochs to train for")
parser.add_argument("--train_batch_size",
default=None,
type=int,
help="batch size during training phase")
parser.add_argument("--gradient_accumulation_steps",
default=None,
type=int,
help="used to reduce GPU memory footprint")
parser.add_argument("--datareader",
default=None,
type=str,
help="approach to reading the data from files.")
parser.add_argument("--vocab_size",
default=None,
type=int,
help="Size of vocabulary.")
parser.add_argument("--embed_size",
default=None,
type=int,
help="Size of vocabulary.")
parser.add_argument("--layer",
default=None,
type=int,
help="If the classifier only uses parts of a model then use this")
parser.add_argument("--max_sequence_length",
default=None,
type=int,
help="maximum sequence length, each document will be truncated to this length.")
parser.add_argument("--num_layers",
default=None,
type=int,
help="The number of encoding layers for a BERT model to keep.")
return parser.parse_args()