Intracranial_Hemorrhage / Git / [2bae97] /SequenceModel/check

Models:
DavidFeaster/
Intracranial_Hemorrhage
Downloads: 1
[2bae97]: / SequenceModel / check_oof.py
History
Download this file
161 lines (133 with data), 5.8 kB

import os
import numpy as np
import pandas as pd
import os
from settings import *

if not os.path.exists('./csv/standard_test.csv'):
    tmp = pd.read_csv('./csv/stage_2_sample_submission.csv')
    tmp['filename'] = tmp['ID'].apply(lambda st: "ID_" + st.split('_')[1])
    tmp['type'] = tmp['ID'].apply(lambda st: st.split('_')[2])
    pivot_df = tmp[['Label', 'filename', 'type']].drop_duplicates().pivot(index='filename', columns='type', values='Label').reset_index()
    pivot_df.to_csv(r'./csv/standard_test.csv',index=False)

if not os.path.exists('./csv/standrad.csv'):
    tmp = pd.read_csv('./csv/stage_1_train.csv')
    tmp['filename'] = tmp['ID'].apply(lambda st: "ID_" + st.split('_')[1])
    tmp['type'] = tmp['ID'].apply(lambda st: st.split('_')[2])
    pivot_df = tmp[['Label', 'filename', 'type']].drop_duplicates().pivot(index='filename', columns='type', values='Label').reset_index()
    pivot_df.to_csv(r'./csv/standard.csv',index=False)

train = r'./csv/standard.csv'
train_df = pd.read_csv(train)
train_df["filename"] = [tmp.replace('.dcm', '') for tmp in train_df["filename"]]
train_df["filename"] = [tmp.replace('.png', '') for tmp in train_df["filename"]]
train_ids = train_df['filename']

test = r'./csv/standard_test.csv'
test_df = pd.read_csv(test)
test_df["filename"] = [tmp.replace('.dcm', '') for tmp in test_df["filename"]]
test_df["filename"] = [tmp.replace('.png', '') for tmp in test_df["filename"]]
test_ids = test_df['filename']

train_num = len(train_ids)
test_num = len(test_ids)

def get_train_dict():
    dict_tmp = {}
    i = 0
    for id in train_ids:
        dict_tmp[id] = i
        i += 1
    return dict_tmp

def get_test_dict():
    dict_tmp = {}
    i = 0
    for id in test_ids:
        dict_tmp[id] = i
        i += 1
    return dict_tmp
#==========================================================================================================
if 1:
    def get_predict(df):
        types = ['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']
        predict_list = []

        for type in types:
            predict = np.asarray(list(df[type + '_y'])).reshape([-1, 1])
            predict_list.append(predict)

        predict = np.concatenate(predict_list,axis =1)
        return predict

    def get_train_test_predict(dir):
        model_name = os.path.split(dir)[1]
        train = r'./csv/standard.csv'
        train_df = pd.read_csv(train)

        pd_tmp = os.path.join(dir,model_name+'_val_prob_TTA_stage2_finetune.csv')
        if not os.path.exists(pd_tmp):
            pd_tmp = os.path.join(dir, model_name + '_val_prob_TTA.csv')

        if not os.path.exists(pd_tmp):
            return None,None

        print(pd_tmp)
        pd_tmp_df = pd.read_csv(pd_tmp)
        train_df["filename"] =[ tmp.replace('.dcm','') for tmp in train_df["filename"]]
        pd_tmp_df["filename"] =[ tmp.replace('.png','') for tmp in pd_tmp_df["filename"]]
        pd_tmp_df["filename"] =[ tmp.replace('.dcm','') for tmp in pd_tmp_df["filename"]]

        merge_csv = pd.merge(train_df, pd_tmp_df, how='left', on='filename')
        merge_csv.to_csv(os.path.join(dir, 'DEBUG_'+model_name + '_val_stage2_sample.csv'))
        predict = get_predict(merge_csv)

        train = r'./csv/standard_test.csv'
        train_df = pd.read_csv(train)

        pd_tmp = os.path.join(dir,model_name+'_test_prob_TTA_stage2_finetune.csv')

        if not os.path.exists(pd_tmp):
            pd_tmp = os.path.join(dir, model_name + '_test_prob_TTA_stage2.csv')

        if not os.path.exists(pd_tmp):
            print(' test None')
            return predict, np.zeros([test_num, 6, 1])

        print(pd_tmp)
        pd_tmp_df = pd.read_csv(pd_tmp)
        train_df["filename"] = [tmp.replace('.dcm', '') for tmp in train_df["filename"]]
        pd_tmp_df["filename"] = [tmp.replace('.png', '') for tmp in pd_tmp_df["filename"]]
        pd_tmp_df["filename"] = [tmp.replace('.dcm', '') for tmp in pd_tmp_df["filename"]]

        merge_csv = pd.merge(train_df, pd_tmp_df, how='left', on='filename')
        merge_csv.to_csv(os.path.join(dir, 'DEBUG_'+model_name + '_test_stage2_sample.csv'))
        predict_test = get_predict(merge_csv)
        print(predict_test.shape)
        return predict, predict_test

    train_predicts = []
    test_predicts= []

    for model_name in os.listdir(os.path.join(feature_path, r'stage2_finetune')):
        print(model_name)
        val_fea, test_fea = get_train_test_predict(dir = os.path.join(feature_path, r'stage2_finetune', model_name))
        if val_fea is not None:
            train_predicts.append(val_fea)
        if test_fea is not None:
            test_predicts.append(test_fea)

if 1:
    label_list = []
    types = ['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']
    weight = [2.0, 1.0, 1.0, 1.0, 1.0, 1.0]
    loss = 0
    index = 0
    merge_csv = pd.read_csv(r'./csv/standard.csv')
    for w, type in zip(weight, types):
        label = np.asarray(list(merge_csv[type])).reshape([-1, 1])
        label_list.append(label)
    label = np.concatenate(label_list,axis =1)

X_list = []
X_test_list = []
for model, model_test in  zip(train_predicts, test_predicts):
    model = model.reshape([train_num, 6, 1])
    model_test = model_test.reshape([test_num, 6, 1])
    X_list.append(model)
    X_test_list.append(model_test)

def move(lst, k):
    return lst[k:] + lst[:k]

def get_X(x_list):
    X = []
    x_mean = np.mean(x_list,axis=0)
    X.append(x_mean)
    x_list_move = move(x_list, 1)
    for x0, x1 in zip(x_list, x_list_move):
        X.append((x0-x1))
    X += x_list
    return X

X_list = get_X(X_list)
X_test_list = get_X(X_test_list)
X = np.concatenate(X_list,axis = 2)
X_test = np.concatenate(X_test_list,axis = 2)
model_num = len(X_list)

y = label
X = X