Diff of /Create_CSV.py [000000] .. [b52eda]

Switch to unified view

a b/Create_CSV.py
1
import pandas as pd
2
import nibabel as nib
3
from random import sample
4
from tqdm import tqdm
5
from Excel_Processing import ProcessSpreadsheets
6
7
DATASET_INFO_PATH = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\imageCHD_dataset_info.xlsx"
8
SCAN_INFO_PATH = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\imagechd_dataset_image_info.xlsx"
9
10
dataset_info = ProcessSpreadsheets(DATASET_INFO_PATH, SCAN_INFO_PATH)
11
12
try:
13
    dataset_info.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\patient_info.csv",
14
                        index = False, mode = 'x')
15
    print('Patient CSV saved.')
16
except:
17
    print('Patient CSV already present. Moving on.')
18
19
dataset_info = dataset_info.drop(['ASD', 'VSD', 'AVSD', 'ToF', 'TGA', 'CA',
20
                                  'PA', 'PDA', 'COUNT', 'PatientSex',
21
                                  'PatientBirthDate', 'AcquisitionDate'],
22
                                  axis = 1)
23
24
axial_count = [nib.load("C:\\Users\\leotu\\Downloads\\ImageCHD_dataset\\ImageCHD_dataset\\ct_" \
25
                         + str(x) + "_image.nii.gz") \
26
               .header['dim'][3] for x in dataset_info['index'].sort_values()]
27
dataset_info['Axial_count'] = axial_count
28
train = list()
29
evaluation = list()
30
test = list()
31
32
for index, row in tqdm(dataset_info.iterrows()):
33
    temp = row.copy(deep = True)
34
    ax_c = temp['Axial_count']
35
    temp = temp.rename({'Axial_count': 'Adjacency_count'})
36
37
    temp_sagittal = list()
38
    temp_coronal = list()
39
    temp_axial = list()
40
41
    for i in range(0, 512):
42
        temp['Type'] = 'S'
43
        temp['Indice'] = i
44
        temp_sagittal.append(temp.copy(deep = True))
45
    
46
    for i in range(0, 512):
47
        temp['Type'] = 'C'
48
        temp['Indice'] = i
49
        temp_coronal.append(temp.copy(deep = True))
50
    
51
    for i in range(0, ax_c):
52
        temp['Type'] = 'A'
53
        temp['Indice'] = i
54
        temp['Adjacency_count'] = 512
55
        temp_axial.append(temp.copy(deep = True))
56
    
57
    new_train = sample(temp_sagittal, k = round(512*0.7))
58
    tmp = list()
59
60
    good = False
61
    for sag in temp_sagittal:
62
        good = True
63
        for tr in new_train:
64
            if sag.equals(tr):
65
                good = False
66
                break
67
        if good:
68
            tmp.append(sag)
69
    
70
    temp_sagittal = tmp.copy()
71
    for tr in new_train:
72
        train.append(tr)
73
74
    new_train = sample(temp_coronal, k = round(512*0.7))
75
    tmp = list()
76
77
    good = False
78
    for cor in temp_coronal:
79
        good = True
80
        for tr in new_train:
81
            if cor.equals(tr):
82
                good = False
83
                break
84
        if good:
85
            tmp.append(cor)
86
    
87
    temp_coronal = tmp.copy()
88
    for tr in new_train:
89
        train.append(tr)
90
91
    new_train = sample(temp_axial, k = round(ax_c*0.7))
92
    tmp = list()
93
94
    good = False
95
    for ax in temp_axial:
96
        good = True
97
        for tr in new_train:
98
            if ax.equals(tr):
99
                good = False
100
                break
101
        if good:
102
            tmp.append(ax)
103
    
104
    temp_axial = tmp.copy()
105
    for tr in new_train:
106
        train.append(tr)
107
108
    new_eval = sample(temp_sagittal, k = round(512*0.2))
109
    tmp = list()
110
111
    good = False
112
    for sag in temp_sagittal:
113
        good = True
114
        for ev in new_eval:
115
            if sag.equals(ev):
116
                good = False
117
                break
118
        if good:
119
            tmp.append(sag)
120
    
121
    temp_sagittal = tmp.copy()
122
    for ev in new_eval:
123
        evaluation.append(ev)
124
125
    new_eval = sample(temp_coronal, k = round(512*0.2))
126
    tmp = list()
127
128
    good = False
129
    for cor in temp_coronal:
130
        good = True
131
        for ev in new_eval:
132
            if cor.equals(ev):
133
                good = False
134
                break
135
        if good:
136
            tmp.append(cor)
137
    
138
    temp_coronal = tmp.copy()
139
    for ev in new_eval:
140
        evaluation.append(ev)
141
142
    new_eval = sample(temp_axial, k = round(ax_c*0.2))
143
    tmp = list()
144
145
    good = False
146
    for ax in temp_axial:
147
        good = True
148
        for ev in new_eval:
149
            if ax.equals(ev):
150
                good = False
151
                break
152
        if good:
153
            tmp.append(ax)
154
    
155
    temp_axial = tmp.copy()
156
    for ev in new_eval:
157
        evaluation.append(ev)
158
159
    for te in temp_sagittal:
160
        test.append(te)
161
    for te in temp_coronal:
162
        test.append(te)
163
    for te in temp_axial:
164
        test.append(te)
165
166
train_dataset = pd.DataFrame(train).reset_index().drop('level_0', axis = 1)
167
eval_dataset = pd.DataFrame(evaluation).reset_index().drop('level_0', axis = 1)
168
test_dataset = pd.DataFrame(test).reset_index().drop('level_0', axis = 1)
169
170
print("Train: ", str(train_dataset.__len__()))
171
print("Eval: ", str(eval_dataset.__len__()))
172
print("Test: ", str(test_dataset.__len__()))
173
174
try:
175
    train_dataset.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\train_dataset_info.csv",
176
                         index = False, mode = 'x')
177
    print('Train dataset CSV saved.')
178
except:
179
    print('Train dataset CSV already present.')
180
181
try:
182
    eval_dataset.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\eval_dataset_info.csv",
183
                        index = False, mode = 'x')
184
    print('Eval dataset CSV saved.')
185
except:
186
    print('Eval dataset CSV already present.')
187
188
try:
189
    test_dataset.to_csv(path_or_buf = "C:\\Users\\leotu\\OneDrive\\Documents\\ImageCHD_dataset\\test_dataset_info.csv",
190
                        index = False, mode = 'x')
191
    print('Test dataset CSV saved.')
192
except:
193
    print('Test dataset CSV already present.')