In [None]:
####################
###  LIBRARIES  ####
####################

import numpy as np
import warnings
import pandas as pd
import os
import matplotlib.pyplot as plt
import cv2

# Remove TensorFlow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Import TensorFlow and Keras for neural network operations
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import Loss
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()

# Set the default float type for TensorFlow to "float32"
tf.keras.backend.set_floatx("float32")

# Print the number of available GPUs
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [None]:
####################
### DATA LOADING ###
####################

print('Starting preprocessing of bags')

# Define directories for image files during testing
test_images_dir = './test/'

# Get lists of files in the directories
test_files = os.listdir(test_images_dir)

# Read bag data from CSV files
test_bags = pd.read_csv("./tables/testing_example.csv")

# Filter test bags based on DCM file existence
test_files_dcm = [k[:-4] + '.dcm' for k in test_files]
test_bags = test_bags[test_bags.instance_name.isin(test_files_dcm)]

Starting preprocessing of bags


In [None]:
##########################
### BAGS PREPROCESSING ###
##########################

# Set the desired bag size
bag_size = 57

# Create additional test bags to reach the desired bag size
added_test_bags = pd.DataFrame()
for idx in test_bags.bag_name.unique():
    bags = test_bags[test_bags.bag_name==idx].copy()
    num_add = bag_size - len(bags.instance_name)

    aux = bags.iloc[0].copy()
    aux.instance_label = 0
    aux.instance_name = 'all_zeros'
    for i in range(num_add):
        added_test_bags = added_test_bags.append(aux)

test_bags = test_bags.append(added_test_bags)

# Convert bags data to dictionaries for optimization
test_bags_dic = {k: list(test_bags[test_bags.bag_name==k].instance_name) for k in test_bags.bag_name.unique()}

In [None]:
####################
###  DATALOADER  ###
####################
dim=(512,512,bag_size)

class DataGeneratorMIL(keras.utils.Sequence):
    'Generates data for Keras'

    def __init__(self, list_IDs, labels=None, batch_size=256, dim=(512,512,512), n_channels=3,
                 n_classes=2, shuffle=True, is_train=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.is_train = (labels is not None) and is_train
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        list_IDs_temp = self.list_IDs[index*self.batch_size:(index+1)*self.batch_size]

        X = self.__data_generation(list_IDs_temp)
        # Generate data
        if self.is_train:
            y = self.labels[index*self.batch_size:(index+1)*self.batch_size]
            return np.array(X), np.array(y, dtype='float64')
        else:
            return np.array(X)

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            if self.is_train:
                ids = train_bags_dic[ID]
            else:
                ids = test_bags_dic[ID]
            imgs = []
            for idx in ids:
                if idx == 'all_zeros':
                    img = np.zeros((self.dim[0], self.dim[1], self.n_channels))
                    imgs.append(img)
                    continue
                if self.is_train:
                    _dir = train_files_loc[idx]
                    img = np.load(_dir + idx[:-4] + '.npy')
                    img = cv2.resize(img, (self.dim[1], self.dim[0]))
                    imgs.append(img)
                else:
                    img = np.load(test_images_dir + idx[:-4] + '.npy')
                    img = cv2.resize(img, (self.dim[1], self.dim[0]))
                    imgs.append(img)
            X[i,] = np.transpose(imgs, [1,2,0,3])

        return X

In [None]:
########################
### Test Generator ###
########################
batch_size = 1

# Preparing the test dataset
bags2 = test_bags.groupby('bag_name').max()
test_dataset = DataGeneratorMIL(np.array(bags2.index), bags2.bag_label, batch_size=1, dim=dim, is_train=False)

In [None]:
####################
###    MODEL     ###
####################

# MILAttentionLayer
class MILAttentionLayer(layers.Layer):
    """Implementation of the attention-based Deep MIL layer."""

    def __init__(
        self,
        weight_params_dim,
        kernel_initializer="glorot_uniform",
        kernel_regularizer=None,
        use_gated=False,
        **kwargs,
    ):
        super().__init__(**kwargs)

        self.weight_params_dim = weight_params_dim
        self.use_gated = use_gated

        self.kernel_initializer = keras.initializers.get(kernel_initializer)
        self.kernel_regularizer = keras.regularizers.get(kernel_regularizer)

        self.v_init = self.kernel_initializer
        self.w_init = self.kernel_initializer
        self.u_init = self.kernel_initializer

        self.v_regularizer = self.kernel_regularizer
        self.w_regularizer = self.kernel_regularizer
        self.u_regularizer = self.kernel_regularizer

    def build(self, input_shape):
        input_dim = input_shape[1]

        self.v_weight_params = self.add_weight(
            shape=(input_dim, self.weight_params_dim),
            initializer=self.v_init,
            name="v",
            regularizer=self.v_regularizer,
            trainable=True,
        )

        self.w_weight_params = self.add_weight(
            shape=(self.weight_params_dim, 1),
            initializer=self.w_init,
            name="w",
            regularizer=self.w_regularizer,
            trainable=True,
        )

        if self.use_gated:
            self.u_weight_params = self.add_weight(
                shape=(input_dim, self.weight_params_dim),
                initializer=self.u_init,
                name="u",
                regularizer=self.u_regularizer,
                trainable=True,
            )
        else:
            self.u_weight_params = None

        self.input_built = True

    def call(self, inputs):
        instances = self.compute_attention_scores(inputs)
        instances = tf.reshape(instances, shape=(-1, dim[2]))
        alpha = tf.math.softmax(instances, axis=1)
        return alpha

    def compute_attention_scores(self, instance):
        original_instance = instance
        instance = tf.math.tanh(tf.tensordot(instance, self.v_weight_params, axes=1))

        if self.use_gated:
            instance = instance * tf.math.sigmoid(
                tf.tensordot(original_instance, self.u_weight_params, axes=1)
            )

        return tf.tensordot(instance, self.w_weight_params, axes=1)


# Model
num_data = batch_size
D = bag_size

Conv1 = layers.Conv2D(16, (5, 5), data_format="channels_last", activation='relu', kernel_initializer='glorot_uniform', padding='same')
Conv2 = layers.Conv2D(32, (3,3),  data_format="channels_last", activation='relu')
Conv3 = layers.Conv2D(32, (3,3),  data_format="channels_last", activation='relu')
Conv4 = layers.Conv2D(32, (3,3),  data_format="channels_last", activation='relu')
Conv5 = layers.Conv2D(32, (3,3),  data_format="channels_last", activation='relu')
Conv6 = layers.Conv2D(32, (3,3),  data_format="channels_last", activation='relu')

def VGG(inp):
    inp = tf.reshape(tf.transpose(inp, perm=(0,3,1,2,4)), shape=(-1, dim[0], dim[1], 3))
    x = Conv1(inp)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool2D((2, 2), data_format="channels_last", strides=(2, 2))(x)
    x = Conv2(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool2D((2, 2), strides=(2, 2), data_format="channels_last")(x)
    x = layers.Dropout(0.3)(x)

    x = Conv3(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool2D((2, 2), strides=(2, 2), data_format="channels_last")(x)
    x = Conv4(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool2D((2, 2), strides=(2, 2), data_format="channels_last")(x)

    x = Conv5(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool2D((2, 2), strides=(2, 2), data_format="channels_last")(x)
    x = layers.Dropout(0.3)(x)

    x = Conv6(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool2D((2, 2), strides=(2, 2), data_format="channels_last")(x)
    x = layers.Dropout(0.3)(x)

    return layers.Flatten()(x)

def build_model():
    inp = keras.Input(shape=(*dim, 3))
    H = VGG(inp)
    A = MILAttentionLayer(
        weight_params_dim=64,
        kernel_regularizer=keras.regularizers.l2(0.01),
        use_gated=True,
        name="alpha",
    )(H)
    H = tf.reshape(H, shape=(-1, dim[2], H.shape[1]))
    A = tf.expand_dims(A, axis=1)
    intermediate = tf.linalg.matmul(A, H)
    intermediate = tf.squeeze(intermediate, axis=1)
    intermediate = layers.Dropout(0.25)(intermediate)
    intermediate = layers.Dense(128)(intermediate)
    out = layers.Dense(1, activation='sigmoid')(intermediate)
    return keras.Model(inputs=inp, outputs=out)

model = build_model()
print(model.summary())

Instructions for updating:
Colocations handled automatically by placer.


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 512, 512, 5  0           []                               
                                7, 3)]                                                            
                                                                                                  
 tf_op_layer_transpose (TensorF  [(None, 57, 512, 51  0          ['input_1[0][0]']                
 lowOpLayer)                    2, 3)]                                                            
                                                                                                  
 tf_op_layer_Reshape (TensorFlo  [(None, 512, 512, 3  0          ['tf_op_layer_transpose[0][0]']  
 wOpLayer)                      )]                                                            

In [None]:
####################
###    Evaluate     ###
####################
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
import time

for i in range(0, 5):

    # Perform prediction on the test dataset using the saved checkpoint
    checkpoint_path = "./att_{}.ckpt".format(i)
    model.load_weights(checkpoint_path)

    start = time.process_time()
    preds = model.predict(test_dataset)
    print('time:', time.process_time() - start)

    target = bags2.bag_label

    # print('AUC:', roc_auc_score(target[:], preds[:, 0]))
    preds_value = (preds[:, 0] > 0.5) * 1
    print('Accuracy:', accuracy_score(target, preds_value))
    print('Precision:', precision_score(target, preds_value))
    print('Recall:', recall_score(target, preds_value))
    print('F1 score:', f1_score(target, preds_value))

In [1]:
####################
###    Slice Labels Predictions    ###
####################
for i in range(0, 5):

    # Perform prediction on the test dataset using the saved checkpoint
    checkpoint_path = "./models/att_{}.ckpt".format(i)
    model.load_weights(checkpoint_path)

    weights_layer = model.get_layer("alpha")
    feature_model = keras.Model([model.inputs], [weights_layer.output, model.output])
    weights, pred = feature_model.predict(test_dataset)

    instance_id = []
    bag_id = []
    pred_bag_id = []
    pred_instance_id = []
    for i, ID in enumerate(np.array(bags2.index)):
        ids = test_bags_dic[ID]
        for ii, idx in enumerate(ids):
            instance_id.append(idx)
            bag_id.append(ID)
            pred_bag_id.append(pred[i][0])
            pred_instance_id.append(weights[i,ii])

    df_rest = df = pd.DataFrame(list(zip(instance_id, bag_id, pred_bag_id, pred_instance_id)),
            columns =['instance_name', 'bag_name', 'bag_cnn_probability', 'cnn_prediction'])

    df_rest.to_csv('test_visual.csv')
