Switch to unified view

a b/Preprocessing Medical Data Pipeline/preprocessing.py
1
#Libraries
2
import numpy as np
3
import os
4
import pydicom
5
import SimpleITK as sitk
6
import pandas as pd
7
import trimesh
8
from pyntcloud import PyntCloud
9
from skimage.transform import resize
10
import sys
11
import matplotlib.pyplot as plt
12
from tqdm import tqdm
13
import nibabel as nib
14
import re
15
import cv2
16
from PIL import Image
17
18
# Helper Function
19
def flatten_3d_to_2d(array_3d):
20
    # Get the dimensions of the 3D array
21
    depth, height, width = array_3d.shape
22
    
23
    # Reshape the 3D array to a 2D array
24
    array_2d = np.reshape(array_3d, (depth, height * width))
25
    
26
    return array_2d
27
28
def transform_string(input_string):
29
    parts = input_string.split('_')
30
    
31
    if len(parts) < 2:
32
        return None
33
    
34
    prefix = parts[-2].upper()
35
    
36
    try:
37
        number_part = int(parts[0])
38
    except ValueError:
39
        return None
40
    
41
    new_string = f'{prefix}_{number_part:03d}'
42
    return new_string
43
44
# Helper Function
45
def flatten_2d_array(arr):
46
    flattened = []
47
    for row in arr:
48
        flattened.extend(row)
49
    return flattened
50
# Helper Function
51
# Read in entire scan of single patient
52
# folders = [f for f in os.listdir('MRI Scans - Tairawhiti') if os.path.isdir(os.path.join('MRI Scans - Tairawhiti', f))]
53
def ListFolders(directory):
54
    folder_names = []
55
    for root, dirs, files in os.walk(directory):
56
        for folder in dirs:
57
            folder_names.append(folder)
58
    return folder_names
59
# Helper Function
60
def read_dicom_files(directory):
61
    dicom_files = []
62
    files = os.listdir(directory)
63
    sorted_files = sorted(files)
64
    # print(sorted_files)
65
    for filename in sorted_files:
66
        filepath = os.path.join(directory, filename)
67
        if os.path.isfile(filepath) and filename.endswith('.dcm'):
68
            try:
69
                dicom_file = pydicom.dcmread(filepath)
70
                dicom_files.append(dicom_file)
71
            except pydicom.errors.InvalidDicomError:
72
                print(f"Skipping file: {filename}. It is not a valid DICOM file.")
73
    return dicom_files
74
# Helper Function
75
def get_ram_usage(variable, variable_name):
76
    size_in_bytes = sys.getsizeof(variable)
77
    size_in_kb = size_in_bytes / 1024
78
    size_in_mb = size_in_kb / 1024
79
    size_in_gb = size_in_mb / 1024
80
    message = "Memory usage of %s: %d %s." % (variable_name, size_in_mb, 'MB')
81
    print(message)
82
#Helper Function
83
def convert_4d_to_3d(array_4d, axis):
84
    array_3d = np.squeeze(array_4d, axis=axis)
85
    return array_3d
86
def plot_3d_data(x, y, z):
87
    # Define the size of the figure (width, height) in inches
88
    fig = plt.figure(figsize=(4, 4))
89
    # Plot the 3D scatter plot
90
    ax = fig.add_subplot(111, projection='3d')
91
    ax.scatter(x, y, z, c='blue')
92
    ax.set_xlabel('X')
93
    ax.set_ylabel('Y')
94
    ax.set_zlabel('Z')
95
    plt.show()
96
def superimpose_images(image1, image2):
97
    image1 = image1 / np.max(image1)
98
    image2 = image2 / np.max(image2)
99
    alpha = 0.5
100
    superimposed_image = alpha * image1 + (1 - alpha) * image2
101
    return superimposed_image
102
103
104
def ReadIn_MRIScans_Masks(scans_path, folders):
105
    print('Patient Scan Data: ', folders)
106
    scan_pixel_data = []
107
    scan_coordinate_data = []
108
    single_scan_pixel_data = []
109
    scan_coordinate_data = []
110
    scan_orientation_data = []
111
    scan_pixelspacing_data = []
112
113
    single_paitent_scans_path =  scans_path + '/Raw DICOM MRI Scans/{}'.format(folders)
114
    dicom_files = read_dicom_files(single_paitent_scans_path)
115
116
    # Extracting pixel data
117
    for i in range (len(dicom_files)):
118
        normalized_data = (dicom_files[i].pixel_array)/(np.max(dicom_files[i].pixel_array))
119
        single_scan_pixel_data.append(normalized_data)
120
        print("Max pixel value in image stack is: ", np.max(normalized_data))
121
    scan_pixel_data.append(single_scan_pixel_data)
122
123
    training_scans = flatten_2d_array(scan_pixel_data)
124
    training_scans = np.array(training_scans)
125
    print("Max pixel value in image stack is: ", training_scans.max())
126
    
127
    # Coordinate Data
128
    single_paitent_scans_path =  scans_path + '/{}'.format(folders)
129
    for i in range (len(dicom_files)):
130
        scan_coordinate_data.append(dicom_files[i].ImagePositionPatient)
131
        scan_orientation_data.append(dicom_files[i].ImageOrientationPatient)
132
        scan_pixelspacing_data.append(dicom_files[i].PixelSpacing)
133
        
134
    coord_data = pd.DataFrame(scan_coordinate_data, columns=["x", "y", "z"])
135
    return training_scans
136
137
138
# Mapping coordinate data from groundtruth mask/label to mri training data
139
def MappingCoordinateData(filename_label, coord_data):
140
    # Load in mesh of label data
141
    mesh = trimesh.load_mesh(('C:/Users/GGPC/OneDrive/Desktop/Part 4 Project/Part4Project/SegmentationMasks/{}.ply').format(filename_label))
142
143
    # Convert the mesh vertices to a DataFrame
144
    vertices = pd.DataFrame(mesh.vertices, columns=["x", "y", "z"])
145
146
    # Pranav Ordering
147
    # coord_data = coord_data.sort_values('z', ascending = False)
148
    # coord_data = coord_data.reset_index(drop = True)
149
    vertices = vertices.sort_values('z', ascending = False)
150
    vertices = vertices.reset_index(drop = True)
151
152
    print('Height of Paitent in mm: ', np.abs(coord_data.iloc[-1][2] - coord_data.iloc[0][2]))
153
    print('Length of Paitent AOI (tibia) in mm: ', np.abs(vertices.iloc[-1][2] - vertices.iloc[0][2]))
154
    # vertices['z'] = vertices['z'].apply(lambda x: round(x, 1))
155
    coord_data['z'] = coord_data['z'].apply(lambda x: round(x, 1))
156
157
    # plot_3d_data(np.array(vertices['x']), np.array(vertices['y']), np.array(vertices['z']))
158
159
    if True:
160
        vertices.to_csv('ExactMaskCoordinateData.csv')
161
        coord_data.to_csv('ExactScanCoordinateData.csv')
162
        
163
    # vertices['z'] = np.round(vertices['z'] * 2) / 2
164
    # coord_data['z'] = np.round(coord_data['z'] * 2) / 2
165
    vertices['z'] = np.round(vertices['z'] * 2) / 2
166
    # coord_data['z'] = np.round(coord_data['z'] * 10) / 10
167
    # vertices['z'] = vertices['z'].apply(lambda x: round(x, 1))
168
    # coord_data['z'] = coord_data['z'].apply(lambda x: round(x, 1))
169
    if True:
170
        vertices.to_csv('RoundedMaskCoordinateData.csv')
171
172
    merged_df = pd.merge(coord_data, vertices, on='z')
173
    # condensed_df = merged_df.groupby('z').mean().reset_index()
174
    condensed_df = merged_df.groupby('z').median().reset_index()
175
176
    mapping_dict = dict(zip(condensed_df['z'], ['AOI']*len(condensed_df)))
177
178
    coord_data['SegmentationRegionSlice'] = coord_data['z'].map(mapping_dict).fillna('Outside of AOI')
179
180
    slices_aoi_start = (coord_data.loc[coord_data['SegmentationRegionSlice'] == 'AOI'].index)[0]
181
    slices_aoi_end = (coord_data.loc[coord_data['SegmentationRegionSlice'] == 'AOI'].index)[-1]
182
    slice_aoi_range = (slices_aoi_end - slices_aoi_start + 1)
183
    print('AOI Slice Start: ', slices_aoi_start)
184
    print('AOI Slice End: ', slices_aoi_end)
185
    print('AOI Slice Range: ', slice_aoi_range)
186
187
    # CSV Format 
188
    if True:
189
        coord_data.to_csv('tibia_mri_coord.csv')
190
        merged_df.to_csv('mergedcoordsystems.csv')
191
        condensed_df.to_csv('condensedmergedcoordsystems.csv')
192
    # print(coord_data)
193
194
    return slices_aoi_start, slices_aoi_end, slice_aoi_range, coord_data
195
196
197
198
def VoxelisationMask(filename_label, slice_aoi_range):
199
    # Load in mesh of label data
200
    mesh = trimesh.load_mesh(('C:/Users/GGPC/OneDrive/Desktop/Part 4 Project/Part4Project/SegmentationMasks/{}.ply').format(filename_label))
201
202
    # # Convert the mesh vertices to a DataFrame
203
    # vertices = pd.DataFrame(mesh.vertices, columns=["x", "y", "z"])
204
    # # Convert the mesh to a PyntCloud object
205
    # cloud = PyntCloud(vertices)
206
    vertices = pd.DataFrame(mesh.vertices, columns=["x", "y", "z"])
207
    faces = pd.DataFrame(mesh.faces, columns=['v1', 'v2', 'v3'])
208
    cloud = PyntCloud(points=vertices, mesh=faces)
209
210
    # Set the desired resolution
211
    desired_resolution = [slice_aoi_range, 512, 512]
212
213
    # Voxelize the mesh using the PyntCloud voxelization module
214
    voxelgrid_id = cloud.add_structure("voxelgrid", n_x=desired_resolution[0], n_y=desired_resolution[1], n_z=desired_resolution[2])
215
    voxel_grid = cloud.structures[voxelgrid_id].get_feature_vector().reshape(desired_resolution)
216
217
    # Transpose and swap axes to change the voxel grid orientation
218
    voxel_grid = np.transpose(voxel_grid, axes=(2, 0, 1))
219
220
    # Resize the voxel grid to match the desired dimensions
221
    voxel_grid = resize(voxel_grid, desired_resolution, anti_aliasing=False)
222
223
    voxel_grid = np.where(voxel_grid > 0, 1, 0)
224
225
    # print('Mask Slices Normalized to MRI Scans Shape (Purely AOI): ', voxel_grid.shape)
226
227
    return voxel_grid
228
229
230
231
def MaskCreation(basedir, filename_label):
232
    seg_masks_15A_tibia = sitk.ReadImage(('{}/Raw NIFITI Segmentation Masks (3D Slicer Output)/{}').format(basedir, filename_label))
233
    seg_masks_15A_tibia_data = sitk.GetArrayFromImage(seg_masks_15A_tibia)
234
    voxel_dimensions = seg_masks_15A_tibia.GetSpacing()
235
    voxel_dimensions = pd.DataFrame(data = voxel_dimensions)
236
237
    seg_masks_15A_tibia_data = seg_masks_15A_tibia_data[::-1, :, :]
238
    seg_masks_15A_tibia_data = np.where(seg_masks_15A_tibia_data != 0, 1, 0)
239
240
    seg_masks_15A_tibia_data = np.array(seg_masks_15A_tibia_data)
241
    indices = np.transpose(np.nonzero(seg_masks_15A_tibia_data != 0))
242
    if indices.size > 0:
243
        first_non_zero_index_2d = tuple(indices[0])
244
    else:
245
        first_non_zero_index_2d = None
246
247
    if indices.size > 0:
248
        last_non_zero_index_2d = tuple(indices[-1])
249
    else:
250
        last_non_zero_index_2d = None
251
252
    print("AOI Slice Start: ", first_non_zero_index_2d[0])
253
    print("AOI Slice End: ", last_non_zero_index_2d[0])
254
    print("AOI Slice Range: ", (last_non_zero_index_2d[0] - first_non_zero_index_2d[0] + 1))
255
256
    return seg_masks_15A_tibia_data, first_non_zero_index_2d[0], last_non_zero_index_2d[0]
257
258
def VisualValidationMSK(basedir, colab_fname, slice_idx_bin, slice_idx_multi, multiclass_dir, mask_index):
259
    nii_img_scan = nib.load(('{}/nnUNet Data/scans/msk_00{}.nii.gz').format(basedir, int(((colab_fname[0]).split('_'))[1])))
260
    nii_img_mask = nib.load(('{}/nnUNet Data/masks/{}/{}.nii.gz').format(basedir, ((colab_fname[0]).split('_'))[0],colab_fname[0]))
261
262
    mask_data = nii_img_mask.get_fdata()
263
    scan_data = nii_img_scan.get_fdata()
264
    image1 = scan_data[int(slice_idx_bin), :, :, 0]
265
    image2 = mask_data[int(slice_idx_bin), :, :, 0]
266
    superimposed_image = superimpose_images(image1, image2)
267
    plt.imshow(superimposed_image, cmap='gray')
268
    plt.title(('Validating Scan & Mask (Binary) on Slice {} of {}').format(slice_idx_bin, colab_fname[0]))
269
    plt.axis('off')
270
    plt.show()
271
272
    if (multiclass_dir != False):
273
        nii_img_mask_multiclass = nib.load(('{}/msk_00{}.nii.gz').format(multiclass_dir, mask_index))
274
        multiclass_mask_data = nii_img_mask_multiclass.get_fdata()
275
        image1 = scan_data[slice_idx_multi, :, :, 0]
276
        image2 = multiclass_mask_data[slice_idx_multi, :, :, 0]
277
        superimposed_image = superimpose_images(image1, image2)
278
        plt.imshow(superimposed_image, cmap='gray')
279
        plt.title(('Validating Scan & Mask (Multiclass) on Slice {}/{} of msk_00{}').format(slice_idx_multi, multiclass_mask_data.shape[0], mask_index))
280
        plt.axis('off')
281
        plt.show()
282
283
284
def preprocessing(scans_path, filename_labels, folders, total_slices_raw_data, DataOnlyAOI, Cropping):
285
    train_mask_tibia_labels, training_scans, start_slices_aoi, end_slices_aoi, slice_aoi_ranges  = [], [], [], [], []
286
    filename_labels = [filename_labels]
287
    print('\n')
288
    print('Patient Scan Data Folders Included in Run: ', folders)
289
290
    for index, filename_label in enumerate(filename_labels):
291
        print('\n')
292
        print('Segmentation Mask: ',('{}'.format(filename_label)))
293
294
        training_scan = ReadIn_MRIScans_Masks(scans_path, folders[index])
295
        seg_masks_15A_tibia_data, slices_aoi_start, slices_aoi_end = MaskCreation(scans_path, filename_label)
296
        median_aoi_index = int(np.abs(slices_aoi_end - slices_aoi_start) / 2) + slices_aoi_start
297
298
        if(DataOnlyAOI == True):
299
            if (index == 0):
300
                train_mask_tibia_labels = seg_masks_15A_tibia_data[(slices_aoi_start):(slices_aoi_end+1)]
301
                training_scans = training_scan[(slices_aoi_start):(slices_aoi_end+1)]
302
            else:
303
                train_mask_tibia_labels = np.concatenate((train_mask_tibia_labels, seg_masks_15A_tibia_data[(slices_aoi_start):(slices_aoi_end+1)]), axis=0)
304
                training_scans = np.concatenate((training_scans, training_scan[(slices_aoi_start):(slices_aoi_end+1)]), axis=0)
305
306
        if (DataOnlyAOI == False):
307
            train_mask_tibia_labels.append(seg_masks_15A_tibia_data)
308
            training_scans.append(training_scan)
309
310
        start_slices_aoi.append(slices_aoi_start)
311
        end_slices_aoi.append(slices_aoi_end)
312
313
        print('\n')
314
315
    train_mask_tibia_labels = np.array(train_mask_tibia_labels)
316
    training_scans = np.array(training_scans)
317
318
    # Normalization and Binarization
319
    training_scans = training_scans.astype('float32')
320
    training_scans /= 255.  # scale scans to [0, 1]
321
    train_mask_tibia_labels = np.where(train_mask_tibia_labels != 0, 1, 0)
322
    print("Scans Normalized! [0-1]")
323
    print("Max pixel value in image stack is: ", training_scans.max())
324
    print("Masks Binarised! [0,1]")
325
    print("Labels in the mask are : ", np.unique(train_mask_tibia_labels))
326
    print("\n")
327
328
    if (DataOnlyAOI == False):
329
        training_scans = np.reshape(training_scans, (len(folders) * training_scans.shape[1], 512, 512))
330
        train_mask_tibia_labels = np.reshape(train_mask_tibia_labels, (len(filename_labels) * train_mask_tibia_labels.shape[1], 512, 512))
331
        training_scans = np.expand_dims(training_scans, axis=-1)
332
        train_mask_tibia_labels = np.expand_dims(train_mask_tibia_labels, axis=-1)
333
334
    if (DataOnlyAOI == True):
335
        training_scans = np.expand_dims(training_scans, axis=-1)
336
        train_mask_tibia_labels = np.expand_dims(train_mask_tibia_labels, axis=-1)
337
338
339
    if (Cropping == True):
340
    # Resize 'training_scans'
341
        training_scans_resized = np.empty((training_scans.shape[0], 256, 256, 1), dtype=np.float32)
342
        for i in range(training_scans.shape[0]):
343
            input_image = training_scans[i, :, :, 0]  # Extract the 2D image from the 4D array
344
            pil_image = Image.fromarray(input_image)  # Convert to Pillow Image
345
            resized_image = pil_image.resize((256, 256), Image.BILINEAR)  # Resize using Pillow
346
            training_scans_resized[i, :, :, 0] = np.array(resized_image)  # Store the resized image in the output array
347
348
349
        # Resize 'train_mask_tibia_labels'
350
        train_mask_tibia_labels_resized = np.empty((train_mask_tibia_labels.shape[0], 256, 256, 1), dtype=np.uint8)
351
        for i in range(train_mask_tibia_labels.shape[0]):
352
            input_image = train_mask_tibia_labels[i, :, :, 0]  # Extract the 2D image from the 4D array
353
            pil_image = Image.fromarray(input_image)  # Convert to Pillow Image
354
            resized_image = pil_image.resize((256, 256), Image.BILINEAR)  # Resize using Pillow
355
            train_mask_tibia_labels_resized[i, :, :, 0] = np.array(resized_image)  # Store the resized image in the output array
356
357
        training_scans = training_scans_resized
358
        train_mask_tibia_labels = train_mask_tibia_labels_resized
359
360
    print('Training Scans Input Shape (Full 3D Stack): ', training_scans.shape)
361
    print('Training Masks Input Shape (Full 3D Stack): ', train_mask_tibia_labels.shape)
362
363
    return training_scans, train_mask_tibia_labels, median_aoi_index