|
a |
|
b/Graph_Plotting.py |
|
|
1 |
import seaborn as sns |
|
|
2 |
import matplotlib.pyplot as plt |
|
|
3 |
import pandas as pd |
|
|
4 |
import numpy as np |
|
|
5 |
from Excel_Processing import ProcessSpreadsheets |
|
|
6 |
|
|
|
7 |
DATASET_INFO_PATH = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\imageCHD_dataset_info.xlsx" |
|
|
8 |
SCAN_INFO_PATH = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\imagechd_dataset_image_info.xlsx" |
|
|
9 |
|
|
|
10 |
dataset_info = ProcessSpreadsheets(DATASET_INFO_PATH, SCAN_INFO_PATH) |
|
|
11 |
|
|
|
12 |
nan_cols = ["ASD", "VSD", "AVSD", "ToF", "TGA", "CA", "PA", "PDA"] |
|
|
13 |
|
|
|
14 |
for i in range(0, 54): |
|
|
15 |
dataset_info.loc[i, 'AGE'] = (dataset_info.loc[i, 'AcquisitionDate'] - dataset_info.loc[i, 'PatientBirthDate']).days / 365 |
|
|
16 |
if dataset_info.loc[i, 'AGE'] < 1/12: |
|
|
17 |
dataset_info.loc[i, 'AgeCategory'] = '0-1m' |
|
|
18 |
elif (dataset_info.loc[i, 'AGE'] < 0.25) & (dataset_info.loc[i, 'AGE'] >= 1/12): |
|
|
19 |
dataset_info.loc[i, 'AgeCategory'] = '1-3m' |
|
|
20 |
elif (dataset_info.loc[i, 'AGE'] < 0.5) & (dataset_info.loc[i, 'AGE'] >= 0.25): |
|
|
21 |
dataset_info.loc[i, 'AgeCategory'] = '3-6m' |
|
|
22 |
elif (dataset_info.loc[i, 'AGE'] < 0.75) & (dataset_info.loc[i, 'AGE'] >= 0.5): |
|
|
23 |
dataset_info.loc[i, 'AgeCategory'] = '6-9m' |
|
|
24 |
elif (dataset_info.loc[i, 'AGE'] < 1) & (dataset_info.loc[i, 'AGE'] >= 0.75): |
|
|
25 |
dataset_info.loc[i, 'AgeCategory'] = '9-12m' |
|
|
26 |
elif (dataset_info.loc[i, 'AGE'] < 2) & (dataset_info.loc[i, 'AGE'] >= 1): |
|
|
27 |
dataset_info.loc[i, 'AgeCategory'] = '1-2y' |
|
|
28 |
elif (dataset_info.loc[i, 'AGE'] < 3) & (dataset_info.loc[i, 'AGE'] >= 2): |
|
|
29 |
dataset_info.loc[i, 'AgeCategory'] = '2-3y' |
|
|
30 |
elif (dataset_info.loc[i, 'AGE'] < 5) & (dataset_info.loc[i, 'AGE'] >= 3): |
|
|
31 |
dataset_info.loc[i, 'AgeCategory'] = '3-5y' |
|
|
32 |
elif (dataset_info.loc[i, 'AGE'] < 9) & (dataset_info.loc[i, 'AGE'] >= 5): |
|
|
33 |
dataset_info.loc[i, 'AgeCategory'] = '5-9y' |
|
|
34 |
elif (dataset_info.loc[i, 'AGE'] < 21) & (dataset_info.loc[i, 'AGE'] >= 9): |
|
|
35 |
dataset_info.loc[i, 'AgeCategory'] = '9-21y' |
|
|
36 |
else: |
|
|
37 |
dataset_info.loc[i, 'AgeCategory'] = '21y+' |
|
|
38 |
|
|
|
39 |
# y = [dataset_info[dataset_info['AGE'] < 1/12].__len__(), |
|
|
40 |
# dataset_info[(dataset_info['AGE'] < 0.25) & (dataset_info['AGE'] >= 1/12)].__len__(), |
|
|
41 |
# dataset_info[(dataset_info['AGE'] < 0.5) & (dataset_info['AGE'] >= 0.25)].__len__(), |
|
|
42 |
# dataset_info[(dataset_info['AGE'] < 0.75) & (dataset_info['AGE'] >= 0.5)].__len__(), |
|
|
43 |
# dataset_info[(dataset_info['AGE'] < 1) & (dataset_info['AGE'] >= 0.75)].__len__(), |
|
|
44 |
# dataset_info[(dataset_info['AGE'] < 2) & (dataset_info['AGE'] >= 1)].__len__(), |
|
|
45 |
# dataset_info[(dataset_info['AGE'] < 3) & (dataset_info['AGE'] >= 2)].__len__(), |
|
|
46 |
# dataset_info[(dataset_info['AGE'] < 5) & (dataset_info['AGE'] >= 3)].__len__(), |
|
|
47 |
# dataset_info[(dataset_info['AGE'] < 9) & (dataset_info['AGE'] >= 5)].__len__(), |
|
|
48 |
# dataset_info[(dataset_info['AGE'] < 21) & (dataset_info['AGE'] >= 9)].__len__(), |
|
|
49 |
# dataset_info[(dataset_info['AGE'] >= 21)].__len__()] |
|
|
50 |
|
|
|
51 |
y = np.array([]) |
|
|
52 |
for col in nan_cols: |
|
|
53 |
y = np.append(y, dataset_info[dataset_info[col] == 1].__len__()) |
|
|
54 |
|
|
|
55 |
plt.figure(figsize=(8, 4.8)) |
|
|
56 |
sns.countplot(data = dataset_info, x = 'AgeCategory', hue = 'PatientSex', |
|
|
57 |
order = ['0-1m', '1-3m', '3-6m', '6-9m', '9-12m', '1-2y', '2-3y', '3-5y', '5-9y', '9-21y', '21y+']) |
|
|
58 |
plt.xlabel('Patient age') |
|
|
59 |
plt.ylabel('Count') |
|
|
60 |
plt.title('Pre-processed ImageCHD patient distribution') |
|
|
61 |
plt.legend(title = 'Patient sex') |
|
|
62 |
# plt.savefig('C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\Graphs\\pre-proc_patient_distribution.png') |
|
|
63 |
plt.show() |