--- a
+++ b/Graph_Plotting.py
@@ -0,0 +1,63 @@
+import seaborn as sns
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+from Excel_Processing import ProcessSpreadsheets
+
+DATASET_INFO_PATH = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\imageCHD_dataset_info.xlsx"
+SCAN_INFO_PATH = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\imagechd_dataset_image_info.xlsx"
+
+dataset_info = ProcessSpreadsheets(DATASET_INFO_PATH, SCAN_INFO_PATH)
+
+nan_cols = ["ASD", "VSD", "AVSD", "ToF", "TGA", "CA", "PA", "PDA"]
+
+for i in range(0, 54):
+    dataset_info.loc[i, 'AGE'] = (dataset_info.loc[i, 'AcquisitionDate'] - dataset_info.loc[i, 'PatientBirthDate']).days / 365
+    if dataset_info.loc[i, 'AGE'] < 1/12:
+        dataset_info.loc[i, 'AgeCategory'] = '0-1m'
+    elif (dataset_info.loc[i, 'AGE'] < 0.25) & (dataset_info.loc[i, 'AGE'] >= 1/12):
+        dataset_info.loc[i, 'AgeCategory'] = '1-3m'
+    elif (dataset_info.loc[i, 'AGE'] < 0.5) & (dataset_info.loc[i, 'AGE'] >= 0.25):
+        dataset_info.loc[i, 'AgeCategory'] = '3-6m'
+    elif (dataset_info.loc[i, 'AGE'] < 0.75) & (dataset_info.loc[i, 'AGE'] >= 0.5):
+        dataset_info.loc[i, 'AgeCategory'] = '6-9m'
+    elif (dataset_info.loc[i, 'AGE'] < 1) & (dataset_info.loc[i, 'AGE'] >= 0.75):
+        dataset_info.loc[i, 'AgeCategory'] = '9-12m'
+    elif (dataset_info.loc[i, 'AGE'] < 2) & (dataset_info.loc[i, 'AGE'] >= 1):
+        dataset_info.loc[i, 'AgeCategory'] = '1-2y'
+    elif (dataset_info.loc[i, 'AGE'] < 3) & (dataset_info.loc[i, 'AGE'] >= 2):
+        dataset_info.loc[i, 'AgeCategory'] = '2-3y'
+    elif (dataset_info.loc[i, 'AGE'] < 5) & (dataset_info.loc[i, 'AGE'] >= 3):
+        dataset_info.loc[i, 'AgeCategory'] = '3-5y'
+    elif (dataset_info.loc[i, 'AGE'] < 9) & (dataset_info.loc[i, 'AGE'] >= 5):
+        dataset_info.loc[i, 'AgeCategory'] = '5-9y'
+    elif (dataset_info.loc[i, 'AGE'] < 21) & (dataset_info.loc[i, 'AGE'] >= 9):
+        dataset_info.loc[i, 'AgeCategory'] = '9-21y'
+    else:
+        dataset_info.loc[i, 'AgeCategory'] = '21y+'
+
+# y = [dataset_info[dataset_info['AGE'] < 1/12].__len__(),
+#      dataset_info[(dataset_info['AGE'] < 0.25) & (dataset_info['AGE'] >= 1/12)].__len__(),
+#      dataset_info[(dataset_info['AGE'] < 0.5) & (dataset_info['AGE'] >= 0.25)].__len__(),
+#      dataset_info[(dataset_info['AGE'] < 0.75) & (dataset_info['AGE'] >= 0.5)].__len__(),
+#      dataset_info[(dataset_info['AGE'] < 1) & (dataset_info['AGE'] >= 0.75)].__len__(),
+#      dataset_info[(dataset_info['AGE'] < 2) & (dataset_info['AGE'] >= 1)].__len__(),
+#      dataset_info[(dataset_info['AGE'] < 3) & (dataset_info['AGE'] >= 2)].__len__(),
+#      dataset_info[(dataset_info['AGE'] < 5) & (dataset_info['AGE'] >= 3)].__len__(),
+#      dataset_info[(dataset_info['AGE'] < 9) & (dataset_info['AGE'] >= 5)].__len__(),
+#      dataset_info[(dataset_info['AGE'] < 21) & (dataset_info['AGE'] >= 9)].__len__(),
+#      dataset_info[(dataset_info['AGE'] >= 21)].__len__()]
+
+y = np.array([])
+for col in nan_cols:
+    y = np.append(y, dataset_info[dataset_info[col] == 1].__len__())
+
+plt.figure(figsize=(8, 4.8))
+sns.countplot(data = dataset_info, x = 'AgeCategory', hue = 'PatientSex',
+              order = ['0-1m', '1-3m', '3-6m', '6-9m', '9-12m', '1-2y', '2-3y', '3-5y', '5-9y', '9-21y', '21y+'])
+plt.xlabel('Patient age')
+plt.ylabel('Count')
+plt.title('Pre-processed ImageCHD patient distribution')
+plt.legend(title = 'Patient sex')
+# plt.savefig('C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\Graphs\\pre-proc_patient_distribution.png')
+plt.show()
\ No newline at end of file