import pandas as pd
import nibabel as nib
from random import sample
from tqdm import tqdm
from Excel_Processing import ProcessSpreadsheets
DATASET_INFO_PATH = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\imageCHD_dataset_info.xlsx"
SCAN_INFO_PATH = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\imagechd_dataset_image_info.xlsx"
dataset_info = ProcessSpreadsheets(DATASET_INFO_PATH, SCAN_INFO_PATH)
try:
dataset_info.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\patient_info.csv",
index = False, mode = 'x')
print('Patient CSV saved.')
except:
print('Patient CSV already present. Moving on.')
dataset_info = dataset_info.drop(['ASD', 'VSD', 'AVSD', 'ToF', 'TGA', 'CA',
'PA', 'PDA', 'COUNT', 'PatientSex',
'PatientBirthDate', 'AcquisitionDate'],
axis = 1)
axial_count = [nib.load("C:\\Users\\leotu\\Downloads\\ImageCHD_dataset\\ImageCHD_dataset\\ct_" \
+ str(x) + "_image.nii.gz") \
.header['dim'][3] for x in dataset_info['index'].sort_values()]
dataset_info['Axial_count'] = axial_count
train = list()
evaluation = list()
test = list()
for index, row in tqdm(dataset_info.iterrows()):
temp = row.copy(deep = True)
ax_c = temp['Axial_count']
temp = temp.rename({'Axial_count': 'Adjacency_count'})
temp_sagittal = list()
temp_coronal = list()
temp_axial = list()
for i in range(0, 512):
temp['Type'] = 'S'
temp['Indice'] = i
temp_sagittal.append(temp.copy(deep = True))
for i in range(0, 512):
temp['Type'] = 'C'
temp['Indice'] = i
temp_coronal.append(temp.copy(deep = True))
for i in range(0, ax_c):
temp['Type'] = 'A'
temp['Indice'] = i
temp['Adjacency_count'] = 512
temp_axial.append(temp.copy(deep = True))
new_train = sample(temp_sagittal, k = round(512*0.7))
tmp = list()
good = False
for sag in temp_sagittal:
good = True
for tr in new_train:
if sag.equals(tr):
good = False
break
if good:
tmp.append(sag)
temp_sagittal = tmp.copy()
for tr in new_train:
train.append(tr)
new_train = sample(temp_coronal, k = round(512*0.7))
tmp = list()
good = False
for cor in temp_coronal:
good = True
for tr in new_train:
if cor.equals(tr):
good = False
break
if good:
tmp.append(cor)
temp_coronal = tmp.copy()
for tr in new_train:
train.append(tr)
new_train = sample(temp_axial, k = round(ax_c*0.7))
tmp = list()
good = False
for ax in temp_axial:
good = True
for tr in new_train:
if ax.equals(tr):
good = False
break
if good:
tmp.append(ax)
temp_axial = tmp.copy()
for tr in new_train:
train.append(tr)
new_eval = sample(temp_sagittal, k = round(512*0.2))
tmp = list()
good = False
for sag in temp_sagittal:
good = True
for ev in new_eval:
if sag.equals(ev):
good = False
break
if good:
tmp.append(sag)
temp_sagittal = tmp.copy()
for ev in new_eval:
evaluation.append(ev)
new_eval = sample(temp_coronal, k = round(512*0.2))
tmp = list()
good = False
for cor in temp_coronal:
good = True
for ev in new_eval:
if cor.equals(ev):
good = False
break
if good:
tmp.append(cor)
temp_coronal = tmp.copy()
for ev in new_eval:
evaluation.append(ev)
new_eval = sample(temp_axial, k = round(ax_c*0.2))
tmp = list()
good = False
for ax in temp_axial:
good = True
for ev in new_eval:
if ax.equals(ev):
good = False
break
if good:
tmp.append(ax)
temp_axial = tmp.copy()
for ev in new_eval:
evaluation.append(ev)
for te in temp_sagittal:
test.append(te)
for te in temp_coronal:
test.append(te)
for te in temp_axial:
test.append(te)
train_dataset = pd.DataFrame(train).reset_index().drop('level_0', axis = 1)
eval_dataset = pd.DataFrame(evaluation).reset_index().drop('level_0', axis = 1)
test_dataset = pd.DataFrame(test).reset_index().drop('level_0', axis = 1)
print("Train: ", str(train_dataset.__len__()))
print("Eval: ", str(eval_dataset.__len__()))
print("Test: ", str(test_dataset.__len__()))
try:
train_dataset.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\train_dataset_info.csv",
index = False, mode = 'x')
print('Train dataset CSV saved.')
except:
print('Train dataset CSV already present.')
try:
eval_dataset.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\eval_dataset_info.csv",
index = False, mode = 'x')
print('Eval dataset CSV saved.')
except:
print('Eval dataset CSV already present.')
try:
test_dataset.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\test_dataset_info.csv",
index = False, mode = 'x')
print('Test dataset CSV saved.')
except:
print('Test dataset CSV already present.')