a b/dsb2018_topcoders/albu/src/submit.py
1
import numpy as np
2
import pandas as pd
3
from scipy.misc import imread
4
import cv2
5
import os
6
from scipy import ndimage as ndi
7
from skimage.morphology import remove_small_objects, watershed, remove_small_holes
8
from skimage import measure
9
# Run-length encoding stolen from https://www.kaggle.com/rakhlin/fast-run-length-encoding-python
10
def rle_encoding(x):
11
    dots = np.where(x.T.flatten() == 1)[0]
12
    run_lengths = []
13
    prev = -2
14
    for b in dots:
15
        if (b>prev+1): run_lengths.extend((b + 1, 0))
16
        run_lengths[-1] += 1
17
        prev = b
18
    return run_lengths
19
20
21
def prob_to_rles(lab_img):
22
    # lab_img = label(x > cutoff)
23
    for i in range(1, lab_img.max() + 1):
24
        yield rle_encoding(lab_img == i)
25
26
27
def my_watershed(what, mask1, mask2):
28
    # markers = ndi.label(mask2, output=np.uint32)[0]
29
    # big_seeds = watershed(what, markers, mask=mask1, watershed_line=False)
30
    # m2 = mask1 - (big_seeds > 0)
31
    # mask2 = mask2 | m2
32
33
    markers = ndi.label(mask2, output=np.uint32)[0]
34
    labels = watershed(what, markers, mask=mask1, watershed_line=True)
35
    # labels = watershed(what, markers, mask=mask1, watershed_line=False)
36
    return labels
37
38
39
def calc_score(labels, y_pred):
40
    true_objects = len(np.unique(labels))
41
    pred_objects = len(np.unique(y_pred))
42
    #    print("Number of true objects:", true_objects)
43
    #    print("Number of predicted objects:", pred_objects)
44
    # Compute intersection between all objects
45
    intersection = np.histogram2d(labels.flatten(), y_pred.flatten(), bins=(true_objects, pred_objects))[0]
46
47
    # Compute areas (needed for finding the union between all objects)
48
    area_true = np.histogram(labels, bins=true_objects)[0]
49
    area_pred = np.histogram(y_pred, bins=pred_objects)[0]
50
    area_true = np.expand_dims(area_true, -1)
51
    area_pred = np.expand_dims(area_pred, 0)
52
53
    # Compute union
54
    union = area_true + area_pred - intersection
55
56
    # Exclude background from the analysis
57
    intersection = intersection[1:, 1:]
58
    union = union[1:, 1:]
59
    union[union == 0] = 1e-9
60
61
    # Compute the intersection over union
62
    iou = intersection / union
63
64
    # Precision helper function
65
    def precision_at(threshold, iou):
66
        matches = iou > threshold
67
        true_positives = np.sum(matches, axis=1) == 1  # Correct objects
68
        false_positives = np.sum(matches, axis=0) == 0  # Missed objects
69
        false_negatives = np.sum(matches, axis=1) == 0  # Extra objects
70
        tp, fp, fn = np.sum(true_positives), np.sum(false_positives), np.sum(false_negatives)
71
        return tp, fp, fn
72
73
    # Loop over IoU thresholds
74
    prec = []
75
    #    print("Thresh\tTP\tFP\tFN\tPrec.")
76
    for t in np.arange(0.5, 1.0, 0.05):
77
        tp, fp, fn = precision_at(t, iou)
78
        p = tp / (tp + fp + fn)
79
        #        print("{:1.3f}\t{}\t{}\t{}\t{:1.3f}".format(t, tp, fp, fn, p))
80
        prec.append(p)
81
    #    print("AP\t-\t-\t-\t{:1.3f}".format(np.mean(prec)))
82
    return np.mean(prec), prec
83
84
85
def wsh(mask_img, threshold, border_img, seeds):
86
    img_copy = np.copy(mask_img)
87
    m = seeds * border_img# * dt
88
    img_copy[m <= threshold + 0.35] = 0
89
    img_copy[m > threshold + 0.35] = 1
90
    img_copy = img_copy.astype(np.bool)
91
    img_copy = remove_small_objects(img_copy, 10).astype(np.uint8)
92
93
    mask_img[mask_img <= threshold] = 0
94
    mask_img[mask_img > threshold] = 1
95
    mask_img = mask_img.astype(np.bool)
96
    mask_img = remove_small_holes(mask_img, 1000)
97
    mask_img = remove_small_objects(mask_img, 8).astype(np.uint8)
98
    # cv2.imwrite('t.png', (mask_img * 255).astype(np.uint8))
99
    # cv2.imwrite('t2.png', (img_copy * 255).astype(np.uint8))
100
    labeled_array = my_watershed(mask_img, mask_img, img_copy)
101
    return labeled_array
102
103
def postprocess_victor(pred):
104
    av_pred = pred / 255.
105
    av_pred = av_pred[..., 2] * (1 - av_pred[..., 1])
106
    av_pred = 1 * (av_pred > 0.5)
107
    av_pred = av_pred.astype(np.uint8)
108
109
    y_pred = measure.label(av_pred, neighbors=8, background=0)
110
    props = measure.regionprops(y_pred)
111
    for i in range(len(props)):
112
        if props[i].area < 12:
113
            y_pred[y_pred == i + 1] = 0
114
    y_pred = measure.label(y_pred, neighbors=8, background=0)
115
116
    nucl_msk = (255 - pred[..., 2])
117
    nucl_msk = nucl_msk.astype('uint8')
118
    y_pred = watershed(nucl_msk, y_pred, mask=((pred[..., 2] > 80)), watershed_line=True)
119
    return y_pred
120
121
# test_dir = r'C:\dev\dsbowl\results_test\bowl_remap3\merged'
122
# borders_dir = r'C:\dev\dsbowl\results_test\bowl_remap_border2\merged'
123
124
oof = True
125
# borders_dir = r'd:\tmp\bowl\results_test\s4b\merged'
126
if oof:
127
    # test_dir = r'/home/albu/dev/bowl/results/dpn_sigm_3channel'
128
    test_dir = r'c:\dev\dsbowl\results\dpn_sigm_f0'
129
else:
130
    test_dir = r'd:\tmp\bowl\results_test\dpn_softmax3\merged'
131
# labels_dir = r'/home/albu/dev/bowl/train_imgs/labels_all6'
132
labels_dir = r'D:\dsbowl\train_imgs\labels_all6'
133
# vgg = r'D:\tmp\bowl\vgg2folds\predict'
134
new_test_ids = []
135
rles = []
136
im_names = os.listdir(test_dir)
137
# im_names = [im for im in im_names if not im.startswith('jw-')]
138
test_ids = [os.path.splitext(i)[0] for i in im_names]
139
preds_test = [imread(os.path.join(test_dir, im), mode='RGB') for im in im_names]
140
# vgg_data = [imread(os.path.join(vgg, im), mode='RGB') for im in im_names]
141
# dts = [imread(os.path.join(dt_dir, im), mode='L') for im in im_names]
142
if oof:
143
    pred_labels = [cv2.imread(os.path.join(labels_dir, os.path.splitext(im)[0] + '.tif'), cv2.IMREAD_UNCHANGED) for im in im_names]
144
scores = []
145
for n, id_ in enumerate(test_ids):
146
    # cv2.imshow('b', preds_test[n][...,2])
147
    # cv2.imshow('r', preds_test[n][...,0])
148
    # cv2.waitKey()
149
    test_img = wsh(preds_test[n][...,2] / 255., 0.3, 1 - preds_test[n][...,1] / 255., preds_test[n][...,2] / 255)
150
    # test_img = postprocess_victor(preds_test[n])
151
    if oof:
152
        test_img = ndi.label(test_img, output=np.uint32)[0]
153
        score = calc_score(pred_labels[n], test_img)[0]
154
        scores.append(score)
155
    else:
156
        cv2.imwrite(os.path.join(r'D:\tmp\bowl\res_bin', im_names[n]), (test_img > 0).astype(np.uint8) * 255)
157
    rle = list(prob_to_rles(test_img))
158
    rles.extend(rle)
159
    new_test_ids.extend([id_] * len(rle))
160
if oof:
161
    print(np.mean(scores))
162
else:
163
    sub = pd.DataFrame()
164
    sub['ImageId'] = new_test_ids
165
    sub['EncodedPixels'] = pd.Series(rles).apply(lambda x: ' '.join(str(y) for y in x))
166
    sub.to_csv('sub-dsbowl2018-1.csv', index=False)