Switch to side-by-side view

--- a
+++ b/tutorials/mimic_classifier.py
@@ -0,0 +1,39 @@
+ROOT_EHR_DIR = '/lada2/lily/zl379/Year4/EHRTest/EHRKit/tutorials/' # set your root EHRKit directory here (with the '/' at the end)
+
+import sys
+import os
+sys.path.append(os.path.dirname(ROOT_EHR_DIR))
+
+OUTPUT_DATA_PATH = ROOT_EHR_DIR + 'data/output_data/'
+MIMIC_PATH = ROOT_EHR_DIR + 'data/mimic_data/'
+
+
+
+from mimic_icd9_coding.coding_pipeline import codingPipeline
+from mimic_icd9_coding.utils.mimic_data_preparation import run_mimic_prep
+
+
+run_mimic_prep(output_folder = OUTPUT_DATA_PATH, mimic_data_path= MIMIC_PATH)
+
+print("Building basic tfidf pipeline")
+from sklearn.neural_network import MLPClassifier
+clf = MLPClassifier(hidden_layer_sizes=(100,), max_iter=100, verbose=True)
+# Switch max_iter to 100 for better results, but to run for the first time 10 is good
+my_mimic_pipeline = codingPipeline(verbose=True, model=clf, data_path = OUTPUT_DATA_PATH)
+print("Pipeline complete")
+
+# Let's check out the auroc
+auroc = my_mimic_pipeline.auroc
+print("Auroc is {:.2f}".format(auroc))
+
+
+# Here we load the data into the pipeline, this function simply saves the data, we don't want to save the data automatically because it uses more memory
+my_mimic_pipeline.load_data()
+df = my_mimic_pipeline.data
+
+
+# We run the algorithm and see that at least for this example our model is pretty good
+pred = my_mimic_pipeline.predict(df['TEXT'].iloc[10])
+true = df['TARGET'].iloc[10]
+print("Predicted ICD9 codes: {}".format(pred))
+print("True ICD9 codes: {}".format(true))