In [40]:
import os
import pickle
import random
import glob
import datetime
import pandas as pd
import numpy as np
import cv2
import pydicom
from tqdm import tqdm
from joblib import delayed, Parallel
import zipfile
from pydicom.filebase import DicomBytesIO
import sys
from PIL import Image
import cv2

import keras
from keras.models import model_from_json
import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, GlobalAveragePooling2D
from keras.applications.inception_v3 import InceptionV3

# importing pyplot and image from matplotlib 
import matplotlib.pyplot as plt 
import matplotlib.image as mpimg 


from keras.preprocessing import image
import albumentations as A

In [2]:
base_url = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/'

In [3]:
train_df = pd.read_csv(f'{base_url}/stage_2_train.csv').drop_duplicates()
train_df['ImageID'] = train_df['ID'].str.slice(stop=12)
train_df['Diagnosis'] = train_df['ID'].str.slice(start=13)
train_labels = train_df.pivot(index="ImageID", columns="Diagnosis", values="Label")
train_labels.head()

Diagnosis,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural
ImageID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ID_000012eaf,0,0,0,0,0,0
ID_000039fa0,0,0,0,0,0,0
ID_00005679d,0,0,0,0,0,0
ID_00008ce3c,0,0,0,0,0,0
ID_0000950d7,0,0,0,0,0,0


In [20]:
train_metadata = pd.read_parquet(f'{base_url}/train_metadata.parquet.gzip')
test_metadata = pd.read_parquet(f'{base_url}/test_metadata.parquet.gzip')

train_metadata["Dataset"] = "train"
test_metadata["Dataset"] = "test"

train_metadata = train_metadata.join(train_labels)

metadata = pd.concat([train_metadata, test_metadata], sort=True)
metadata.sort_values(by="ImagePositionPatient_2", inplace=True, ascending=False)
metadata.drop(['ID_6431af929'],inplace = True)

In [21]:
metadata.head()

Unnamed: 0_level_0,BitsAllocated,BitsStored,Columns,Dataset,HighBit,ImageOrientationPatient_0,ImageOrientationPatient_1,ImageOrientationPatient_2,ImageOrientationPatient_3,ImageOrientationPatient_4,...,StudyID,StudyInstanceUID,WindowCenter,WindowWidth,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural
Image,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ID_24250ffbc,16,12,512,train,11,1.0,0.0,0.0,0.0,0.920505,...,,ID_6222a3935b,40.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0
ID_6e8c8d650,16,12,512,train,11,1.0,0.0,0.0,0.0,0.920505,...,,ID_6222a3935b,40.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0
ID_ac042708d,16,12,512,train,11,1.0,0.0,0.0,0.0,0.920505,...,,ID_6222a3935b,40.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0
ID_d1e2a17a9,16,12,512,train,11,1.0,0.0,0.0,0.0,0.927184,...,,ID_a5fb903898,40.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0
ID_e1a1b45a5,16,12,512,train,11,1.0,0.0,0.0,0.0,0.920505,...,,ID_6222a3935b,40.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
test_df = metadata[metadata['Dataset'] == 'test'].iloc[:,:-6].drop(['Dataset'], axis= 1)

In [23]:
train_df = metadata[metadata['Dataset'] == 'train'].drop(['Dataset'], axis= 1)

In [24]:
train_df.shape

(752802, 35)

In [25]:
train_y =  train_df[['any','epidural','intraparenchymal','intraventricular', 'subarachnoid','subdural']]
train_y.shape

(752802, 6)

# Generator

In [26]:
from skimage.io import imread
def get_input(path):
    
    img = imread( path )
    
    return( img )

In [27]:
import numpy as np
import pandas as pd
from tensorflow.keras.applications.vgg19 import preprocess_input
def get_output( path, label_file = None ):
    
    img_id = path.split('/')[-1].split('.')[0]
    labels = label_file.loc[img_id].values
    
    return(labels)

In [46]:
def preprocess_input( image ):
    image_size = (224,224)
    transform = A.Compose([
        A.Resize(*image_size),
        A.HorizontalFlip(),
        A.OneOf([
            A.ElasticTransform(alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03,p=0.1),
            A.GridDistortion(p=0.2),
            A.OpticalDistortion(distort_limit=2, shift_limit=0.5,p=0.3),
        ], p=0.3),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10,p=0.1),
    ])
    random.seed(42) 
    augmented_image = cv2.resize(image, (224, 224),  
               interpolation = cv2.INTER_NEAREST) 
    #augmented_image = transform(image=image)['image']
    #image = np.expand_dims(augmented_image, axis=0)
    #image = preprocess_input(image)
    return( augmented_image )

In [47]:
def image_generator(files, label_file, batch_size = 64):
    train_url = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/png/train/adjacent-brain-cropped/'
    while True:
        # Select files (paths/indices) for the batch
        for index in range(len(files)):
            batch_paths = files[index*batch_size:(index+1)*batch_size]
            batch_input  = []
            batch_output = [] 

            # Read in each input, perform preprocessing and get labels
            for input_path in batch_paths:
                input = get_input( train_url +input_path + '.png')
                output = get_output(train_url + input_path + '.png',label_file=label_file )

                input = preprocess_input(image=input)
                batch_input += [ input ]
                batch_output += [ output ]
            # Return a tuple of (input, output) to feed the network
            batch_x = np.array( batch_input )
            batch_y = np.array( batch_output )

            yield( batch_x, batch_y )

In [48]:
train_url = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/png/train/adjacent-brain-cropped/'


In [49]:
train_generator = image_generator(train_df.index, train_y, batch_size = 32)

In [34]:
l = next(train_generator)

In [35]:
l[0][0].shape

(224, 224, 3)

In [53]:
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(224,224,3))
for layer in base_model.layers[:-2]:
    layer.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
#x = Dense(1024, activation='relu')(x)
#x = Dense(256, activation='relu')(x)
predictions = Dense(6, activation='softmax')(x)

adam = keras.optimizers.Adam(learning_rate=0.00001,
 beta_1=0.9,
 beta_2=0.999,
 amsgrad=False)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy',tf.keras.metrics.TruePositives(),tf.keras.metrics.FalsePositives(), tf.keras.metrics.TrueNegatives(),tf.keras.metrics.FalseNegatives(), tf.keras.metrics.AUC()])

BATCH_SIZE = 128
NUM_TRAIN_IMAGES = train_df.shape[0]
# Train model on dataset
model.fit_generator(train_generator, initial_epoch =2,epochs = 3,steps_per_epoch= NUM_TRAIN_IMAGES // BATCH_SIZE, verbose = 1)


# In[ ]:






Epoch 3/3


<keras.callbacks.callbacks.History at 0x7f0d874cfdd8>

In [52]:

# Save Weights and architecture
model.save_weights('model_weights-BC_3.h5')


# Save the model architecture
with open('model_architecture-BC_3.json', 'w') as f:
    f.write(model.to_json())

print("Model weights and architecture saved.")


Model weights and architecture saved.


In [24]:
train_generator

<generator object image_generator at 0x7efefb7da678>