Diff of /medicalbert/cliparser.py [000000] .. [d129b2]

Switch to side-by-side view

--- a
+++ b/medicalbert/cliparser.py
@@ -0,0 +1,129 @@
+import argparse
+
+#All the parameters that we can set.
+# NB: not all params are used by every classifier.
+def setup_parser():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--train_from_checkpoint",
+                        default=None,
+                        type=str,
+                        help="Continue training from a saved model.")
+
+    parser.add_argument("--save_tokenized_text",
+                        action='store_true',
+                        help="this will output the tokenized process text into a CSV format")
+    parser.add_argument("--train",
+                        action='store_true',
+                        help="Whether to run training.")
+    parser.add_argument("--output_embeddings",
+                        action='store_true',
+                        help="Will take in a classifier and use the underlying model to output the token embeddings")
+    parser.add_argument("--eval",
+                        action='store_true',
+                        help="Whether to run eval on the dev set.")
+    parser.add_argument("--use_model",
+                        default=None,
+                        type=str,
+                        help="Use this model for evaluations")
+    parser.add_argument("--data_dir",
+                        default=None,
+                        type=str,
+                        help="location of input data")
+    parser.add_argument("--output_dir",
+                        default=None,
+                        type=str,
+                        help="location of output")
+    parser.add_argument("--training_data",
+                        default=None,
+                        type=str,
+                        help="name of training file")
+    parser.add_argument("--validation_metric",
+                        default=None,
+                        type=str,
+                        help="metric used to select the best validation checkpoint for testing.")
+    parser.add_argument("--valid_data",
+                        default=None,
+                        type=str,
+                        help="name of validation file")
+    parser.add_argument("--evaluator",
+                        default=None,
+                        type=str,
+                        help="evaluation class to use")
+    parser.add_argument("--seed",
+                        default=None,
+                        type=int,
+                        help="random seed")
+    parser.add_argument("--device",
+                        default=None,
+                        type=str,
+                        help="cpu or cuda")
+    parser.add_argument("--experiment_name",
+                        default=None,
+                        type=str,
+                        help="name of the experiment")
+    parser.add_argument("--learning_rate",
+                        default=None,
+                        type=float,
+                        help="learning_rate")
+    parser.add_argument("--pretrained_model",
+                        default=None,
+                        type=str,
+                        help="pretrained model to train upon.")
+    parser.add_argument("--num_sections",
+                        default=None,
+                        type=int,
+                        help="chunks of text")
+    parser.add_argument("--tokenizer",
+                        default=None,
+                        type=str,
+                        help="tokenizer model to use")
+    parser.add_argument("--num_train_examples",
+                        default=None,
+                        type=int,
+                        help="number of training examples")
+    parser.add_argument("--target",
+                        default=None,
+                        type=str,
+                        help="target column")
+    parser.add_argument("--classifier",
+                        default=None,
+                        type=str,
+                        help="classifier to use")
+    parser.add_argument("--epochs",
+                        default=None,
+                        type=int,
+                        help="Number of epochs to train for")
+    parser.add_argument("--train_batch_size",
+                        default=None,
+                        type=int,
+                        help="batch size during training phase")
+    parser.add_argument("--gradient_accumulation_steps",
+                        default=None,
+                        type=int,
+                        help="used to reduce GPU memory footprint")
+    parser.add_argument("--datareader",
+                        default=None,
+                        type=str,
+                        help="approach to reading the data from files.")
+    parser.add_argument("--vocab_size",
+                        default=None,
+                        type=int,
+                        help="Size of vocabulary.")
+    parser.add_argument("--embed_size",
+                        default=None,
+                        type=int,
+                        help="Size of vocabulary.")
+    parser.add_argument("--layer",
+                        default=None,
+                        type=int,
+                        help="If the classifier only uses parts of a model then use this")
+    parser.add_argument("--max_sequence_length",
+                        default=None,
+                        type=int,
+                        help="maximum sequence length, each document will be truncated to this length.")
+    parser.add_argument("--num_layers",
+                        default=None,
+                        type=int,
+                        help="The number of encoding layers for a BERT model to keep.")
+    return parser.parse_args()