--- a +++ b/dsb2018_topcoders/albu/src/submit.py @@ -0,0 +1,166 @@ +import numpy as np +import pandas as pd +from scipy.misc import imread +import cv2 +import os +from scipy import ndimage as ndi +from skimage.morphology import remove_small_objects, watershed, remove_small_holes +from skimage import measure +# Run-length encoding stolen from https://www.kaggle.com/rakhlin/fast-run-length-encoding-python +def rle_encoding(x): + dots = np.where(x.T.flatten() == 1)[0] + run_lengths = [] + prev = -2 + for b in dots: + if (b>prev+1): run_lengths.extend((b + 1, 0)) + run_lengths[-1] += 1 + prev = b + return run_lengths + + +def prob_to_rles(lab_img): + # lab_img = label(x > cutoff) + for i in range(1, lab_img.max() + 1): + yield rle_encoding(lab_img == i) + + +def my_watershed(what, mask1, mask2): + # markers = ndi.label(mask2, output=np.uint32)[0] + # big_seeds = watershed(what, markers, mask=mask1, watershed_line=False) + # m2 = mask1 - (big_seeds > 0) + # mask2 = mask2 | m2 + + markers = ndi.label(mask2, output=np.uint32)[0] + labels = watershed(what, markers, mask=mask1, watershed_line=True) + # labels = watershed(what, markers, mask=mask1, watershed_line=False) + return labels + + +def calc_score(labels, y_pred): + true_objects = len(np.unique(labels)) + pred_objects = len(np.unique(y_pred)) + # print("Number of true objects:", true_objects) + # print("Number of predicted objects:", pred_objects) + # Compute intersection between all objects + intersection = np.histogram2d(labels.flatten(), y_pred.flatten(), bins=(true_objects, pred_objects))[0] + + # Compute areas (needed for finding the union between all objects) + area_true = np.histogram(labels, bins=true_objects)[0] + area_pred = np.histogram(y_pred, bins=pred_objects)[0] + area_true = np.expand_dims(area_true, -1) + area_pred = np.expand_dims(area_pred, 0) + + # Compute union + union = area_true + area_pred - intersection + + # Exclude background from the analysis + intersection = intersection[1:, 1:] + union = union[1:, 1:] + union[union == 0] = 1e-9 + + # Compute the intersection over union + iou = intersection / union + + # Precision helper function + def precision_at(threshold, iou): + matches = iou > threshold + true_positives = np.sum(matches, axis=1) == 1 # Correct objects + false_positives = np.sum(matches, axis=0) == 0 # Missed objects + false_negatives = np.sum(matches, axis=1) == 0 # Extra objects + tp, fp, fn = np.sum(true_positives), np.sum(false_positives), np.sum(false_negatives) + return tp, fp, fn + + # Loop over IoU thresholds + prec = [] + # print("Thresh\tTP\tFP\tFN\tPrec.") + for t in np.arange(0.5, 1.0, 0.05): + tp, fp, fn = precision_at(t, iou) + p = tp / (tp + fp + fn) + # print("{:1.3f}\t{}\t{}\t{}\t{:1.3f}".format(t, tp, fp, fn, p)) + prec.append(p) + # print("AP\t-\t-\t-\t{:1.3f}".format(np.mean(prec))) + return np.mean(prec), prec + + +def wsh(mask_img, threshold, border_img, seeds): + img_copy = np.copy(mask_img) + m = seeds * border_img# * dt + img_copy[m <= threshold + 0.35] = 0 + img_copy[m > threshold + 0.35] = 1 + img_copy = img_copy.astype(np.bool) + img_copy = remove_small_objects(img_copy, 10).astype(np.uint8) + + mask_img[mask_img <= threshold] = 0 + mask_img[mask_img > threshold] = 1 + mask_img = mask_img.astype(np.bool) + mask_img = remove_small_holes(mask_img, 1000) + mask_img = remove_small_objects(mask_img, 8).astype(np.uint8) + # cv2.imwrite('t.png', (mask_img * 255).astype(np.uint8)) + # cv2.imwrite('t2.png', (img_copy * 255).astype(np.uint8)) + labeled_array = my_watershed(mask_img, mask_img, img_copy) + return labeled_array + +def postprocess_victor(pred): + av_pred = pred / 255. + av_pred = av_pred[..., 2] * (1 - av_pred[..., 1]) + av_pred = 1 * (av_pred > 0.5) + av_pred = av_pred.astype(np.uint8) + + y_pred = measure.label(av_pred, neighbors=8, background=0) + props = measure.regionprops(y_pred) + for i in range(len(props)): + if props[i].area < 12: + y_pred[y_pred == i + 1] = 0 + y_pred = measure.label(y_pred, neighbors=8, background=0) + + nucl_msk = (255 - pred[..., 2]) + nucl_msk = nucl_msk.astype('uint8') + y_pred = watershed(nucl_msk, y_pred, mask=((pred[..., 2] > 80)), watershed_line=True) + return y_pred + +# test_dir = r'C:\dev\dsbowl\results_test\bowl_remap3\merged' +# borders_dir = r'C:\dev\dsbowl\results_test\bowl_remap_border2\merged' + +oof = True +# borders_dir = r'd:\tmp\bowl\results_test\s4b\merged' +if oof: + # test_dir = r'/home/albu/dev/bowl/results/dpn_sigm_3channel' + test_dir = r'c:\dev\dsbowl\results\dpn_sigm_f0' +else: + test_dir = r'd:\tmp\bowl\results_test\dpn_softmax3\merged' +# labels_dir = r'/home/albu/dev/bowl/train_imgs/labels_all6' +labels_dir = r'D:\dsbowl\train_imgs\labels_all6' +# vgg = r'D:\tmp\bowl\vgg2folds\predict' +new_test_ids = [] +rles = [] +im_names = os.listdir(test_dir) +# im_names = [im for im in im_names if not im.startswith('jw-')] +test_ids = [os.path.splitext(i)[0] for i in im_names] +preds_test = [imread(os.path.join(test_dir, im), mode='RGB') for im in im_names] +# vgg_data = [imread(os.path.join(vgg, im), mode='RGB') for im in im_names] +# dts = [imread(os.path.join(dt_dir, im), mode='L') for im in im_names] +if oof: + pred_labels = [cv2.imread(os.path.join(labels_dir, os.path.splitext(im)[0] + '.tif'), cv2.IMREAD_UNCHANGED) for im in im_names] +scores = [] +for n, id_ in enumerate(test_ids): + # cv2.imshow('b', preds_test[n][...,2]) + # cv2.imshow('r', preds_test[n][...,0]) + # cv2.waitKey() + test_img = wsh(preds_test[n][...,2] / 255., 0.3, 1 - preds_test[n][...,1] / 255., preds_test[n][...,2] / 255) + # test_img = postprocess_victor(preds_test[n]) + if oof: + test_img = ndi.label(test_img, output=np.uint32)[0] + score = calc_score(pred_labels[n], test_img)[0] + scores.append(score) + else: + cv2.imwrite(os.path.join(r'D:\tmp\bowl\res_bin', im_names[n]), (test_img > 0).astype(np.uint8) * 255) + rle = list(prob_to_rles(test_img)) + rles.extend(rle) + new_test_ids.extend([id_] * len(rle)) +if oof: + print(np.mean(scores)) +else: + sub = pd.DataFrame() + sub['ImageId'] = new_test_ids + sub['EncodedPixels'] = pd.Series(rles).apply(lambda x: ' '.join(str(y) for y in x)) + sub.to_csv('sub-dsbowl2018-1.csv', index=False)