--- a +++ b/Create_CSV.py @@ -0,0 +1,193 @@ +import pandas as pd +import nibabel as nib +from random import sample +from tqdm import tqdm +from Excel_Processing import ProcessSpreadsheets + +DATASET_INFO_PATH = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\imageCHD_dataset_info.xlsx" +SCAN_INFO_PATH = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\imagechd_dataset_image_info.xlsx" + +dataset_info = ProcessSpreadsheets(DATASET_INFO_PATH, SCAN_INFO_PATH) + +try: + dataset_info.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\patient_info.csv", + index = False, mode = 'x') + print('Patient CSV saved.') +except: + print('Patient CSV already present. Moving on.') + +dataset_info = dataset_info.drop(['ASD', 'VSD', 'AVSD', 'ToF', 'TGA', 'CA', + 'PA', 'PDA', 'COUNT', 'PatientSex', + 'PatientBirthDate', 'AcquisitionDate'], + axis = 1) + +axial_count = [nib.load("C:\\Users\\leotu\\Downloads\\ImageCHD_dataset\\ImageCHD_dataset\\ct_" \ + + str(x) + "_image.nii.gz") \ + .header['dim'][3] for x in dataset_info['index'].sort_values()] +dataset_info['Axial_count'] = axial_count +train = list() +evaluation = list() +test = list() + +for index, row in tqdm(dataset_info.iterrows()): + temp = row.copy(deep = True) + ax_c = temp['Axial_count'] + temp = temp.rename({'Axial_count': 'Adjacency_count'}) + + temp_sagittal = list() + temp_coronal = list() + temp_axial = list() + + for i in range(0, 512): + temp['Type'] = 'S' + temp['Indice'] = i + temp_sagittal.append(temp.copy(deep = True)) + + for i in range(0, 512): + temp['Type'] = 'C' + temp['Indice'] = i + temp_coronal.append(temp.copy(deep = True)) + + for i in range(0, ax_c): + temp['Type'] = 'A' + temp['Indice'] = i + temp['Adjacency_count'] = 512 + temp_axial.append(temp.copy(deep = True)) + + new_train = sample(temp_sagittal, k = round(512*0.7)) + tmp = list() + + good = False + for sag in temp_sagittal: + good = True + for tr in new_train: + if sag.equals(tr): + good = False + break + if good: + tmp.append(sag) + + temp_sagittal = tmp.copy() + for tr in new_train: + train.append(tr) + + new_train = sample(temp_coronal, k = round(512*0.7)) + tmp = list() + + good = False + for cor in temp_coronal: + good = True + for tr in new_train: + if cor.equals(tr): + good = False + break + if good: + tmp.append(cor) + + temp_coronal = tmp.copy() + for tr in new_train: + train.append(tr) + + new_train = sample(temp_axial, k = round(ax_c*0.7)) + tmp = list() + + good = False + for ax in temp_axial: + good = True + for tr in new_train: + if ax.equals(tr): + good = False + break + if good: + tmp.append(ax) + + temp_axial = tmp.copy() + for tr in new_train: + train.append(tr) + + new_eval = sample(temp_sagittal, k = round(512*0.2)) + tmp = list() + + good = False + for sag in temp_sagittal: + good = True + for ev in new_eval: + if sag.equals(ev): + good = False + break + if good: + tmp.append(sag) + + temp_sagittal = tmp.copy() + for ev in new_eval: + evaluation.append(ev) + + new_eval = sample(temp_coronal, k = round(512*0.2)) + tmp = list() + + good = False + for cor in temp_coronal: + good = True + for ev in new_eval: + if cor.equals(ev): + good = False + break + if good: + tmp.append(cor) + + temp_coronal = tmp.copy() + for ev in new_eval: + evaluation.append(ev) + + new_eval = sample(temp_axial, k = round(ax_c*0.2)) + tmp = list() + + good = False + for ax in temp_axial: + good = True + for ev in new_eval: + if ax.equals(ev): + good = False + break + if good: + tmp.append(ax) + + temp_axial = tmp.copy() + for ev in new_eval: + evaluation.append(ev) + + for te in temp_sagittal: + test.append(te) + for te in temp_coronal: + test.append(te) + for te in temp_axial: + test.append(te) + +train_dataset = pd.DataFrame(train).reset_index().drop('level_0', axis = 1) +eval_dataset = pd.DataFrame(evaluation).reset_index().drop('level_0', axis = 1) +test_dataset = pd.DataFrame(test).reset_index().drop('level_0', axis = 1) + +print("Train: ", str(train_dataset.__len__())) +print("Eval: ", str(eval_dataset.__len__())) +print("Test: ", str(test_dataset.__len__())) + +try: + train_dataset.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\train_dataset_info.csv", + index = False, mode = 'x') + print('Train dataset CSV saved.') +except: + print('Train dataset CSV already present.') + +try: + eval_dataset.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\eval_dataset_info.csv", + index = False, mode = 'x') + print('Eval dataset CSV saved.') +except: + print('Eval dataset CSV already present.') + +try: + test_dataset.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\test_dataset_info.csv", + index = False, mode = 'x') + print('Test dataset CSV saved.') +except: + print('Test dataset CSV already present.') \ No newline at end of file