In [1]:
import os
import sys
import argparse
import tensorflow as tf
from tensorflow import keras
import pandas as pd
from data_loader import read_trainset, DataGenerator
import parse_config

In [6]:
# comment out if using tensorflow 2.x
if parse_config.USING_RTX_20XX:
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=config))

In [65]:
MODEL_NAME = '../models/epoch3.hdf5'
img_size = (256,256,3)
batch_size=16

test_images_dir = '/media/keil/baltar/intracranial-hemorrhage-detection-data/stage_2_test_images/'
testset_filename = "../submissions/stage_2_sample_submission.csv"

In [9]:
def read_testset(filename):
    """ Read the submission sample csv
        Args:
            filename (str): Filename of the sample submission 
        Returns:
            df (panda dataframe):  Return a dataframe for inference.  

     """
    df = pd.read_csv(filename)
    df["Image"] = df["ID"].str.slice(stop=12)
    df["Diagnosis"] = df["ID"].str.slice(start=13)

    df = df.loc[:, ["Label", "Diagnosis", "Image"]]
    df = df.set_index(['Image', 'Diagnosis']).unstack(level=-1)

    return df

def create_submission(model, data, test_df):

    print('+'*50)
    print("Creating predictions on test dataset")
    pred = model.predict_generator(data, verbose=1)
    out_df = pd.DataFrame(pred, index=test_df.index, columns=test_df.columns)
    test_df = out_df.stack().reset_index()
    test_df.insert(loc=0, column='ID', value=test_df['Image'].astype(str) + "_" + test_df['Diagnosis'])
    test_df = test_df.drop(["Image", "Diagnosis"], axis=1)
    print("Saving submissions to submission.csv")
    test_df.to_csv('../submissions/stage2-final-submission-v2.csv', index=False)

    return test_df

In [66]:
test_df = read_testset(testset_filename)
test_generator = DataGenerator(list_IDs = test_df.index, 
                                batch_size = batch_size,
                                img_size = img_size,
                                img_dir = test_images_dir)
best_model = keras.models.load_model(MODEL_NAME, compile=False)

In [67]:
#test_df shape: (121232, 6) -- 121232 files in stage_2_test via keil$ ls -1 stage_2_test_images/ | wc -l | less
assert len(test_generator.indices) == len(test_df == len(test_generator.list_IDs)) #checks out


test_df.head()

Unnamed: 0_level_0,Label,Label,Label,Label,Label,Label
Diagnosis,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural
Image,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
ID_000000e27,0.5,0.5,0.5,0.5,0.5,0.5
ID_000009146,0.5,0.5,0.5,0.5,0.5,0.5
ID_00007b8cb,0.5,0.5,0.5,0.5,0.5,0.5
ID_000134952,0.5,0.5,0.5,0.5,0.5,0.5
ID_000176f2a,0.5,0.5,0.5,0.5,0.5,0.5


What is going on is the batch size is not evenly divisable by the img count in the test2_stage of 121232/batch of 20 = remainder of 8 images thus the size of 121240 which I was seeing. Confirming now by using a batchsize of 16 which is evenly divisible... will confirm again via batch size = 1

In [68]:
# step through the functon line by line:

# create_submission(best_model, test_generator, test_df)
# def create_submission(model, data, test_df):

pred_batch16 = best_model.predict_generator(test_generator, verbose=1)



In [69]:
pred_batch16.shape #good to go.... :D ffs

(121232, 6)

In [71]:
# After getting predictions here is some pandas gymnastics...
out_df = pd.DataFrame(pred_batch16, index=test_df.index, columns=test_df.columns)


test_df = out_df.stack().reset_index()


test_df.insert(loc=0, column='ID', value=test_df['Image'].astype(str) + "_" + test_df['Diagnosis'])


test_df = test_df.drop(["Image", "Diagnosis"], axis=1)


test_df.to_csv('../submissions/stage2-final-submission-v2.csv', index=False)


In [70]:
pred.shape

(121240, 6)

In [59]:
temp_df = pd.DataFrame(pred)
temp_df.to_csv('./temp_csv.csv')

In [58]:
temp_df.head()


Unnamed: 0,0,1,2,3,4,5
0,0.117452,0.000942,0.067592,0.000453,0.052313,0.011529
1,0.001256,1e-05,0.000121,0.000128,0.00044,0.000986
2,0.002467,0.000215,0.003454,0.000158,0.000787,0.001039
3,0.002803,9.1e-05,0.000339,4.2e-05,0.001047,0.001354
4,0.002144,4.6e-05,0.000286,0.000154,0.000292,0.002259
