--- a +++ b/medicalbert/cliparser.py @@ -0,0 +1,129 @@ +import argparse + +#All the parameters that we can set. +# NB: not all params are used by every classifier. +def setup_parser(): + parser = argparse.ArgumentParser() + + parser.add_argument("--train_from_checkpoint", + default=None, + type=str, + help="Continue training from a saved model.") + + parser.add_argument("--save_tokenized_text", + action='store_true', + help="this will output the tokenized process text into a CSV format") + parser.add_argument("--train", + action='store_true', + help="Whether to run training.") + parser.add_argument("--output_embeddings", + action='store_true', + help="Will take in a classifier and use the underlying model to output the token embeddings") + parser.add_argument("--eval", + action='store_true', + help="Whether to run eval on the dev set.") + parser.add_argument("--use_model", + default=None, + type=str, + help="Use this model for evaluations") + parser.add_argument("--data_dir", + default=None, + type=str, + help="location of input data") + parser.add_argument("--output_dir", + default=None, + type=str, + help="location of output") + parser.add_argument("--training_data", + default=None, + type=str, + help="name of training file") + parser.add_argument("--validation_metric", + default=None, + type=str, + help="metric used to select the best validation checkpoint for testing.") + parser.add_argument("--valid_data", + default=None, + type=str, + help="name of validation file") + parser.add_argument("--evaluator", + default=None, + type=str, + help="evaluation class to use") + parser.add_argument("--seed", + default=None, + type=int, + help="random seed") + parser.add_argument("--device", + default=None, + type=str, + help="cpu or cuda") + parser.add_argument("--experiment_name", + default=None, + type=str, + help="name of the experiment") + parser.add_argument("--learning_rate", + default=None, + type=float, + help="learning_rate") + parser.add_argument("--pretrained_model", + default=None, + type=str, + help="pretrained model to train upon.") + parser.add_argument("--num_sections", + default=None, + type=int, + help="chunks of text") + parser.add_argument("--tokenizer", + default=None, + type=str, + help="tokenizer model to use") + parser.add_argument("--num_train_examples", + default=None, + type=int, + help="number of training examples") + parser.add_argument("--target", + default=None, + type=str, + help="target column") + parser.add_argument("--classifier", + default=None, + type=str, + help="classifier to use") + parser.add_argument("--epochs", + default=None, + type=int, + help="Number of epochs to train for") + parser.add_argument("--train_batch_size", + default=None, + type=int, + help="batch size during training phase") + parser.add_argument("--gradient_accumulation_steps", + default=None, + type=int, + help="used to reduce GPU memory footprint") + parser.add_argument("--datareader", + default=None, + type=str, + help="approach to reading the data from files.") + parser.add_argument("--vocab_size", + default=None, + type=int, + help="Size of vocabulary.") + parser.add_argument("--embed_size", + default=None, + type=int, + help="Size of vocabulary.") + parser.add_argument("--layer", + default=None, + type=int, + help="If the classifier only uses parts of a model then use this") + parser.add_argument("--max_sequence_length", + default=None, + type=int, + help="maximum sequence length, each document will be truncated to this length.") + parser.add_argument("--num_layers", + default=None, + type=int, + help="The number of encoding layers for a BERT model to keep.") + return parser.parse_args()