Diff of /Graph_Plotting.py [000000] .. [b52eda]

Switch to unified view

a b/Graph_Plotting.py
1
import seaborn as sns
2
import matplotlib.pyplot as plt
3
import pandas as pd
4
import numpy as np
5
from Excel_Processing import ProcessSpreadsheets
6
7
DATASET_INFO_PATH = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\imageCHD_dataset_info.xlsx"
8
SCAN_INFO_PATH = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\imagechd_dataset_image_info.xlsx"
9
10
dataset_info = ProcessSpreadsheets(DATASET_INFO_PATH, SCAN_INFO_PATH)
11
12
nan_cols = ["ASD", "VSD", "AVSD", "ToF", "TGA", "CA", "PA", "PDA"]
13
14
for i in range(0, 54):
15
    dataset_info.loc[i, 'AGE'] = (dataset_info.loc[i, 'AcquisitionDate'] - dataset_info.loc[i, 'PatientBirthDate']).days / 365
16
    if dataset_info.loc[i, 'AGE'] < 1/12:
17
        dataset_info.loc[i, 'AgeCategory'] = '0-1m'
18
    elif (dataset_info.loc[i, 'AGE'] < 0.25) & (dataset_info.loc[i, 'AGE'] >= 1/12):
19
        dataset_info.loc[i, 'AgeCategory'] = '1-3m'
20
    elif (dataset_info.loc[i, 'AGE'] < 0.5) & (dataset_info.loc[i, 'AGE'] >= 0.25):
21
        dataset_info.loc[i, 'AgeCategory'] = '3-6m'
22
    elif (dataset_info.loc[i, 'AGE'] < 0.75) & (dataset_info.loc[i, 'AGE'] >= 0.5):
23
        dataset_info.loc[i, 'AgeCategory'] = '6-9m'
24
    elif (dataset_info.loc[i, 'AGE'] < 1) & (dataset_info.loc[i, 'AGE'] >= 0.75):
25
        dataset_info.loc[i, 'AgeCategory'] = '9-12m'
26
    elif (dataset_info.loc[i, 'AGE'] < 2) & (dataset_info.loc[i, 'AGE'] >= 1):
27
        dataset_info.loc[i, 'AgeCategory'] = '1-2y'
28
    elif (dataset_info.loc[i, 'AGE'] < 3) & (dataset_info.loc[i, 'AGE'] >= 2):
29
        dataset_info.loc[i, 'AgeCategory'] = '2-3y'
30
    elif (dataset_info.loc[i, 'AGE'] < 5) & (dataset_info.loc[i, 'AGE'] >= 3):
31
        dataset_info.loc[i, 'AgeCategory'] = '3-5y'
32
    elif (dataset_info.loc[i, 'AGE'] < 9) & (dataset_info.loc[i, 'AGE'] >= 5):
33
        dataset_info.loc[i, 'AgeCategory'] = '5-9y'
34
    elif (dataset_info.loc[i, 'AGE'] < 21) & (dataset_info.loc[i, 'AGE'] >= 9):
35
        dataset_info.loc[i, 'AgeCategory'] = '9-21y'
36
    else:
37
        dataset_info.loc[i, 'AgeCategory'] = '21y+'
38
39
# y = [dataset_info[dataset_info['AGE'] < 1/12].__len__(),
40
#      dataset_info[(dataset_info['AGE'] < 0.25) & (dataset_info['AGE'] >= 1/12)].__len__(),
41
#      dataset_info[(dataset_info['AGE'] < 0.5) & (dataset_info['AGE'] >= 0.25)].__len__(),
42
#      dataset_info[(dataset_info['AGE'] < 0.75) & (dataset_info['AGE'] >= 0.5)].__len__(),
43
#      dataset_info[(dataset_info['AGE'] < 1) & (dataset_info['AGE'] >= 0.75)].__len__(),
44
#      dataset_info[(dataset_info['AGE'] < 2) & (dataset_info['AGE'] >= 1)].__len__(),
45
#      dataset_info[(dataset_info['AGE'] < 3) & (dataset_info['AGE'] >= 2)].__len__(),
46
#      dataset_info[(dataset_info['AGE'] < 5) & (dataset_info['AGE'] >= 3)].__len__(),
47
#      dataset_info[(dataset_info['AGE'] < 9) & (dataset_info['AGE'] >= 5)].__len__(),
48
#      dataset_info[(dataset_info['AGE'] < 21) & (dataset_info['AGE'] >= 9)].__len__(),
49
#      dataset_info[(dataset_info['AGE'] >= 21)].__len__()]
50
51
y = np.array([])
52
for col in nan_cols:
53
    y = np.append(y, dataset_info[dataset_info[col] == 1].__len__())
54
55
plt.figure(figsize=(8, 4.8))
56
sns.countplot(data = dataset_info, x = 'AgeCategory', hue = 'PatientSex',
57
              order = ['0-1m', '1-3m', '3-6m', '6-9m', '9-12m', '1-2y', '2-3y', '3-5y', '5-9y', '9-21y', '21y+'])
58
plt.xlabel('Patient age')
59
plt.ylabel('Count')
60
plt.title('Pre-processed ImageCHD patient distribution')
61
plt.legend(title = 'Patient sex')
62
# plt.savefig('C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\Graphs\\pre-proc_patient_distribution.png')
63
plt.show()