a b/preprocess/preprocessingutils.py
1
import os
2
import numpy as np
3
import pandas as pd
4
import pylidc as pl
5
import pydicom
6
import pickle
7
from sklearn.preprocessing import PowerTransformer
8
9
def annotation_to_dict(ann):
10
    '''
11
    Flattens annotations into a single row pandas DataFrame
12
    '''
13
    
14
    d = {
15
        'patient_id': ann.scan.patient_id,
16
        'nodule_id_lidc': ann._nodule_id,
17
        'annotation_id_lidc': ann.id,
18
        'scan_id': ann.scan_id,
19
        'volume': ann.volume,
20
        'surface_area': ann.surface_area,
21
        'diameter': ann.diameter
22
    }
23
    feature_names = ['sublety', 'internalstructure', 'calcification',
24
               'sphericity', 'margin', 'lobulation', 'spiculation',
25
               'texture', 'malignancy']
26
    for feature, value in zip(feature_names, ann.feature_vals()):
27
        d[feature] = value
28
    return d
29
30
def annotation_to_df(ann):
31
    try:
32
        d = annotation_to_dict(ann)
33
        d = {k: [v] for k, v in d.items()}
34
        df = pd.DataFrame.from_dict(d)
35
    except:
36
        df = None
37
    return df
38
39
def annotation_list_to_df(anns):
40
    assert isinstance(anns, list)
41
    dfs = []
42
    for ann in anns:
43
        dfs.append(annotation_to_df(ann))
44
    df = pd.concat(dfs, ignore_index=True)
45
    df["annotation_idx"] = range(1, df.shape[0]+1)
46
    return df
47
48
49
def get_intercept_and_slope(scan):
50
    ''' 
51
    scan is the results of a pydicom query
52
    returns the intercept and slope
53
    adapted from https://www.kaggle.com/gzuidhof/full-preprocessing-tutorial
54
    '''
55
    dcm_path = scan.get_path_to_dicom_files()
56
    dcm_files = [x for x in os.listdir(dcm_path) if x.endswith(".dcm")]
57
    slice0 = pydicom.read_file(os.path.join(dcm_path, dcm_files[0]), stop_before_pixels=True)
58
    intercept = slice0.RescaleIntercept
59
    slope = slice0.RescaleSlope
60
    return intercept, slope
61
62
def resample_and_crop_annotation(ann_id, ann, nodule_path, mask_path=None, scan=None, size_mm = 50, export_mask = True):
63
    '''
64
    take an annotation, crop and resample
65
    size is the length of the sides of the resulting cube in millimeters
66
    '''
67
    if scan is None:
68
        scan = ann.scan
69
    intercept, slope = get_intercept_and_slope(scan)
70
    try:
71
        vol, mask = ann.uniform_cubic_resample(side_length = size_mm, verbose = True)
72
        if slope != 1:
73
            vol = slope * vol.astype(np.float64)
74
75
        vol = vol.astype(np.int16)
76
        vol += np.int16(intercept)
77
        
78
        np.save(os.path.join(nodule_path, ann_id+".npy"), vol)
79
        if export_mask:
80
            assert mask_path != None
81
            np.save(os.path.join(mask_path, ann_id+".npy"), mask)
82
        print("")
83
    except:
84
        print("-failed")
85
86
87
def crop_nodule_tight_z(ann, volume=None, scan=None, scan_spacing=None, out_size_cm = 5):
88
    """
89
    Get nodule cropped tightly in z direction, but of minimum dimension in xy plane
90
    """
91
    # print(f"trying to crop")
92
    if volume is None:
93
        if scan is None:
94
            scan = ann.scan
95
        volume = scan.to_volume()
96
    if scan_spacing is None:
97
        scan_spacing = scan.pixel_spacing
98
    # print(f"scan_spacing: {scan_spacing}")
99
    padding = get_padding_tight_z(ann, scan_spacing=scan_spacing, out_size_cm=out_size_cm)
100
    # print(f"padding: {padding}")
101
102
    mask = ann.boolean_mask(pad=padding)
103
    bbox = ann.bbox(pad=padding)
104
    zvals= ann.contour_slice_indices
105
    arr  = volume[bbox]
106
107
    return arr, mask, zvals
108
109
def get_padding_tight_z(ann, scan=None, scan_spacing=None, out_size_cm = None):
110
    """
111
    Get bbox dimensions base on a minimal size, restricting to no padding in z direction
112
    """
113
    if scan_spacing is None:
114
        if scan is None:
115
            scan_spacing = ann.scan.pixel_spacing
116
        else:
117
            scan_spacing = scan.pixel_spacing
118
    # return tight bounding box
119
    if out_size_cm is None:
120
        padding = [(int(0), int(0))] * 3
121
    else:
122
        # if len(out_size_cm == 3):
123
        #     if out_size_cm[2] is None:
124
        #         padding_z = (0,0)        
125
        out_shape = (np.ceil((out_size_cm * 10) / scan_spacing) * np.ones((2,))).astype(int)
126
127
        bb_mat   = ann.bbox_matrix()
128
        bb_shape = bb_mat[:,1] - bb_mat[:,0]
129
130
        paddings = out_shape - bb_shape[:2]
131
132
        # print(f"paddings: {paddings}")
133
134
        padding_x = (int(np.ceil(paddings[0] / 2)), int(np.floor(paddings[0] / 2)))
135
        padding_y = (int(np.ceil(paddings[1] / 2)), int(np.floor(paddings[1] / 2)))
136
137
        padding = [padding_x, padding_y, (int(0),int(0))]
138
139
    return padding
140
141
142
143
def normalize(x, window = None, level = None, in_min = -1000.0, in_max = 600.0, center=0.0):
144
    """
145
    Normalize array to values between 0 and 1, possibly clipping
146
    """
147
    assert type(x) is np.ndarray
148
149
    if (not window is None) & (not level is None) :
150
        in_min = level - (window / 2)
151
        in_max = level + (window / 2)
152
153
    x = x - in_min                 # add zero point
154
    x = x / (in_max - in_min)      # scale to unit
155
    x = x + center                 # adjust white-balance
156
    x = np.clip(x, 0.0, 1.0)       # clip to (0,1)
157
    return x
158
159
def normalized_to_8bit(x):
160
    assert ((x.min() >= 0) & (x.max() <= 1))
161
    x = (255 * x)
162
    return x.astype(np.uint8)
163
164
def normalize_to_8bit(x, *args, **kwargs):
165
    return normalized_to_8bit(normalize(x, *args, **kwargs))
166
167
def pwr_transform(x, train_ids=None):
168
    x  = np.array(x).reshape(-1,1)
169
    pt = PowerTransformer(method="yeo-johnson")
170
    if train_ids is None:
171
        pt.fit(x)
172
    else:
173
        pt.fit(x[train_ids])
174
    y = pt.transform(x)
175
    return np.squeeze(y)