|
a |
|
b/Create_CSV.py |
|
|
1 |
import pandas as pd |
|
|
2 |
import nibabel as nib |
|
|
3 |
from random import sample |
|
|
4 |
from tqdm import tqdm |
|
|
5 |
from Excel_Processing import ProcessSpreadsheets |
|
|
6 |
|
|
|
7 |
DATASET_INFO_PATH = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\imageCHD_dataset_info.xlsx" |
|
|
8 |
SCAN_INFO_PATH = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\imagechd_dataset_image_info.xlsx" |
|
|
9 |
|
|
|
10 |
dataset_info = ProcessSpreadsheets(DATASET_INFO_PATH, SCAN_INFO_PATH) |
|
|
11 |
|
|
|
12 |
try: |
|
|
13 |
dataset_info.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\patient_info.csv", |
|
|
14 |
index = False, mode = 'x') |
|
|
15 |
print('Patient CSV saved.') |
|
|
16 |
except: |
|
|
17 |
print('Patient CSV already present. Moving on.') |
|
|
18 |
|
|
|
19 |
dataset_info = dataset_info.drop(['ASD', 'VSD', 'AVSD', 'ToF', 'TGA', 'CA', |
|
|
20 |
'PA', 'PDA', 'COUNT', 'PatientSex', |
|
|
21 |
'PatientBirthDate', 'AcquisitionDate'], |
|
|
22 |
axis = 1) |
|
|
23 |
|
|
|
24 |
axial_count = [nib.load("C:\\Users\\leotu\\Downloads\\ImageCHD_dataset\\ImageCHD_dataset\\ct_" \ |
|
|
25 |
+ str(x) + "_image.nii.gz") \ |
|
|
26 |
.header['dim'][3] for x in dataset_info['index'].sort_values()] |
|
|
27 |
dataset_info['Axial_count'] = axial_count |
|
|
28 |
train = list() |
|
|
29 |
evaluation = list() |
|
|
30 |
test = list() |
|
|
31 |
|
|
|
32 |
for index, row in tqdm(dataset_info.iterrows()): |
|
|
33 |
temp = row.copy(deep = True) |
|
|
34 |
ax_c = temp['Axial_count'] |
|
|
35 |
temp = temp.rename({'Axial_count': 'Adjacency_count'}) |
|
|
36 |
|
|
|
37 |
temp_sagittal = list() |
|
|
38 |
temp_coronal = list() |
|
|
39 |
temp_axial = list() |
|
|
40 |
|
|
|
41 |
for i in range(0, 512): |
|
|
42 |
temp['Type'] = 'S' |
|
|
43 |
temp['Indice'] = i |
|
|
44 |
temp_sagittal.append(temp.copy(deep = True)) |
|
|
45 |
|
|
|
46 |
for i in range(0, 512): |
|
|
47 |
temp['Type'] = 'C' |
|
|
48 |
temp['Indice'] = i |
|
|
49 |
temp_coronal.append(temp.copy(deep = True)) |
|
|
50 |
|
|
|
51 |
for i in range(0, ax_c): |
|
|
52 |
temp['Type'] = 'A' |
|
|
53 |
temp['Indice'] = i |
|
|
54 |
temp['Adjacency_count'] = 512 |
|
|
55 |
temp_axial.append(temp.copy(deep = True)) |
|
|
56 |
|
|
|
57 |
new_train = sample(temp_sagittal, k = round(512*0.7)) |
|
|
58 |
tmp = list() |
|
|
59 |
|
|
|
60 |
good = False |
|
|
61 |
for sag in temp_sagittal: |
|
|
62 |
good = True |
|
|
63 |
for tr in new_train: |
|
|
64 |
if sag.equals(tr): |
|
|
65 |
good = False |
|
|
66 |
break |
|
|
67 |
if good: |
|
|
68 |
tmp.append(sag) |
|
|
69 |
|
|
|
70 |
temp_sagittal = tmp.copy() |
|
|
71 |
for tr in new_train: |
|
|
72 |
train.append(tr) |
|
|
73 |
|
|
|
74 |
new_train = sample(temp_coronal, k = round(512*0.7)) |
|
|
75 |
tmp = list() |
|
|
76 |
|
|
|
77 |
good = False |
|
|
78 |
for cor in temp_coronal: |
|
|
79 |
good = True |
|
|
80 |
for tr in new_train: |
|
|
81 |
if cor.equals(tr): |
|
|
82 |
good = False |
|
|
83 |
break |
|
|
84 |
if good: |
|
|
85 |
tmp.append(cor) |
|
|
86 |
|
|
|
87 |
temp_coronal = tmp.copy() |
|
|
88 |
for tr in new_train: |
|
|
89 |
train.append(tr) |
|
|
90 |
|
|
|
91 |
new_train = sample(temp_axial, k = round(ax_c*0.7)) |
|
|
92 |
tmp = list() |
|
|
93 |
|
|
|
94 |
good = False |
|
|
95 |
for ax in temp_axial: |
|
|
96 |
good = True |
|
|
97 |
for tr in new_train: |
|
|
98 |
if ax.equals(tr): |
|
|
99 |
good = False |
|
|
100 |
break |
|
|
101 |
if good: |
|
|
102 |
tmp.append(ax) |
|
|
103 |
|
|
|
104 |
temp_axial = tmp.copy() |
|
|
105 |
for tr in new_train: |
|
|
106 |
train.append(tr) |
|
|
107 |
|
|
|
108 |
new_eval = sample(temp_sagittal, k = round(512*0.2)) |
|
|
109 |
tmp = list() |
|
|
110 |
|
|
|
111 |
good = False |
|
|
112 |
for sag in temp_sagittal: |
|
|
113 |
good = True |
|
|
114 |
for ev in new_eval: |
|
|
115 |
if sag.equals(ev): |
|
|
116 |
good = False |
|
|
117 |
break |
|
|
118 |
if good: |
|
|
119 |
tmp.append(sag) |
|
|
120 |
|
|
|
121 |
temp_sagittal = tmp.copy() |
|
|
122 |
for ev in new_eval: |
|
|
123 |
evaluation.append(ev) |
|
|
124 |
|
|
|
125 |
new_eval = sample(temp_coronal, k = round(512*0.2)) |
|
|
126 |
tmp = list() |
|
|
127 |
|
|
|
128 |
good = False |
|
|
129 |
for cor in temp_coronal: |
|
|
130 |
good = True |
|
|
131 |
for ev in new_eval: |
|
|
132 |
if cor.equals(ev): |
|
|
133 |
good = False |
|
|
134 |
break |
|
|
135 |
if good: |
|
|
136 |
tmp.append(cor) |
|
|
137 |
|
|
|
138 |
temp_coronal = tmp.copy() |
|
|
139 |
for ev in new_eval: |
|
|
140 |
evaluation.append(ev) |
|
|
141 |
|
|
|
142 |
new_eval = sample(temp_axial, k = round(ax_c*0.2)) |
|
|
143 |
tmp = list() |
|
|
144 |
|
|
|
145 |
good = False |
|
|
146 |
for ax in temp_axial: |
|
|
147 |
good = True |
|
|
148 |
for ev in new_eval: |
|
|
149 |
if ax.equals(ev): |
|
|
150 |
good = False |
|
|
151 |
break |
|
|
152 |
if good: |
|
|
153 |
tmp.append(ax) |
|
|
154 |
|
|
|
155 |
temp_axial = tmp.copy() |
|
|
156 |
for ev in new_eval: |
|
|
157 |
evaluation.append(ev) |
|
|
158 |
|
|
|
159 |
for te in temp_sagittal: |
|
|
160 |
test.append(te) |
|
|
161 |
for te in temp_coronal: |
|
|
162 |
test.append(te) |
|
|
163 |
for te in temp_axial: |
|
|
164 |
test.append(te) |
|
|
165 |
|
|
|
166 |
train_dataset = pd.DataFrame(train).reset_index().drop('level_0', axis = 1) |
|
|
167 |
eval_dataset = pd.DataFrame(evaluation).reset_index().drop('level_0', axis = 1) |
|
|
168 |
test_dataset = pd.DataFrame(test).reset_index().drop('level_0', axis = 1) |
|
|
169 |
|
|
|
170 |
print("Train: ", str(train_dataset.__len__())) |
|
|
171 |
print("Eval: ", str(eval_dataset.__len__())) |
|
|
172 |
print("Test: ", str(test_dataset.__len__())) |
|
|
173 |
|
|
|
174 |
try: |
|
|
175 |
train_dataset.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\train_dataset_info.csv", |
|
|
176 |
index = False, mode = 'x') |
|
|
177 |
print('Train dataset CSV saved.') |
|
|
178 |
except: |
|
|
179 |
print('Train dataset CSV already present.') |
|
|
180 |
|
|
|
181 |
try: |
|
|
182 |
eval_dataset.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\eval_dataset_info.csv", |
|
|
183 |
index = False, mode = 'x') |
|
|
184 |
print('Eval dataset CSV saved.') |
|
|
185 |
except: |
|
|
186 |
print('Eval dataset CSV already present.') |
|
|
187 |
|
|
|
188 |
try: |
|
|
189 |
test_dataset.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\test_dataset_info.csv", |
|
|
190 |
index = False, mode = 'x') |
|
|
191 |
print('Test dataset CSV saved.') |
|
|
192 |
except: |
|
|
193 |
print('Test dataset CSV already present.') |