GDL-CT-Segmentation / Git / [b52eda] /Create

Models:
tobiasharvey/
GDL-CT-Segmentation
Downloads: 1
[b52eda]: / Create_CSV.py
History
Download this file
193 lines (159 with data), 5.6 kB

import pandas as pd
import nibabel as nib
from random import sample
from tqdm import tqdm
from Excel_Processing import ProcessSpreadsheets

DATASET_INFO_PATH = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\imageCHD_dataset_info.xlsx"
SCAN_INFO_PATH = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\imagechd_dataset_image_info.xlsx"

dataset_info = ProcessSpreadsheets(DATASET_INFO_PATH, SCAN_INFO_PATH)

try:
    dataset_info.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\patient_info.csv",
                        index = False, mode = 'x')
    print('Patient CSV saved.')
except:
    print('Patient CSV already present. Moving on.')

dataset_info = dataset_info.drop(['ASD', 'VSD', 'AVSD', 'ToF', 'TGA', 'CA',
                                  'PA', 'PDA', 'COUNT', 'PatientSex',
                                  'PatientBirthDate', 'AcquisitionDate'],
                                  axis = 1)

axial_count = [nib.load("C:\\Users\\leotu\\Downloads\\ImageCHD_dataset\\ImageCHD_dataset\\ct_" \
                         + str(x) + "_image.nii.gz") \
               .header['dim'][3] for x in dataset_info['index'].sort_values()]
dataset_info['Axial_count'] = axial_count
train = list()
evaluation = list()
test = list()

for index, row in tqdm(dataset_info.iterrows()):
    temp = row.copy(deep = True)
    ax_c = temp['Axial_count']
    temp = temp.rename({'Axial_count': 'Adjacency_count'})

    temp_sagittal = list()
    temp_coronal = list()
    temp_axial = list()

    for i in range(0, 512):
        temp['Type'] = 'S'
        temp['Indice'] = i
        temp_sagittal.append(temp.copy(deep = True))
    
    for i in range(0, 512):
        temp['Type'] = 'C'
        temp['Indice'] = i
        temp_coronal.append(temp.copy(deep = True))
    
    for i in range(0, ax_c):
        temp['Type'] = 'A'
        temp['Indice'] = i
        temp['Adjacency_count'] = 512
        temp_axial.append(temp.copy(deep = True))
    
    new_train = sample(temp_sagittal, k = round(512*0.7))
    tmp = list()

    good = False
    for sag in temp_sagittal:
        good = True
        for tr in new_train:
            if sag.equals(tr):
                good = False
                break
        if good:
            tmp.append(sag)
    
    temp_sagittal = tmp.copy()
    for tr in new_train:
        train.append(tr)

    new_train = sample(temp_coronal, k = round(512*0.7))
    tmp = list()

    good = False
    for cor in temp_coronal:
        good = True
        for tr in new_train:
            if cor.equals(tr):
                good = False
                break
        if good:
            tmp.append(cor)
    
    temp_coronal = tmp.copy()
    for tr in new_train:
        train.append(tr)

    new_train = sample(temp_axial, k = round(ax_c*0.7))
    tmp = list()

    good = False
    for ax in temp_axial:
        good = True
        for tr in new_train:
            if ax.equals(tr):
                good = False
                break
        if good:
            tmp.append(ax)
    
    temp_axial = tmp.copy()
    for tr in new_train:
        train.append(tr)

    new_eval = sample(temp_sagittal, k = round(512*0.2))
    tmp = list()

    good = False
    for sag in temp_sagittal:
        good = True
        for ev in new_eval:
            if sag.equals(ev):
                good = False
                break
        if good:
            tmp.append(sag)
    
    temp_sagittal = tmp.copy()
    for ev in new_eval:
        evaluation.append(ev)

    new_eval = sample(temp_coronal, k = round(512*0.2))
    tmp = list()

    good = False
    for cor in temp_coronal:
        good = True
        for ev in new_eval:
            if cor.equals(ev):
                good = False
                break
        if good:
            tmp.append(cor)
    
    temp_coronal = tmp.copy()
    for ev in new_eval:
        evaluation.append(ev)

    new_eval = sample(temp_axial, k = round(ax_c*0.2))
    tmp = list()

    good = False
    for ax in temp_axial:
        good = True
        for ev in new_eval:
            if ax.equals(ev):
                good = False
                break
        if good:
            tmp.append(ax)
    
    temp_axial = tmp.copy()
    for ev in new_eval:
        evaluation.append(ev)

    for te in temp_sagittal:
        test.append(te)
    for te in temp_coronal:
        test.append(te)
    for te in temp_axial:
        test.append(te)

train_dataset = pd.DataFrame(train).reset_index().drop('level_0', axis = 1)
eval_dataset = pd.DataFrame(evaluation).reset_index().drop('level_0', axis = 1)
test_dataset = pd.DataFrame(test).reset_index().drop('level_0', axis = 1)

print("Train: ", str(train_dataset.__len__()))
print("Eval: ", str(eval_dataset.__len__()))
print("Test: ", str(test_dataset.__len__()))

try:
    train_dataset.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\train_dataset_info.csv",
                         index = False, mode = 'x')
    print('Train dataset CSV saved.')
except:
    print('Train dataset CSV already present.')

try:
    eval_dataset.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\eval_dataset_info.csv",
                        index = False, mode = 'x')
    print('Eval dataset CSV saved.')
except:
    print('Eval dataset CSV already present.')

try:
    test_dataset.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\test_dataset_info.csv",
                        index = False, mode = 'x')
    print('Test dataset CSV saved.')
except:
    print('Test dataset CSV already present.')