--- a +++ b/src/ovr/mlmodel_data.py @@ -0,0 +1,16 @@ +from src.utils import labeltarget, preprocess +import numpy as np + +frequent_icd9category = ['401','427','276','414','272','250','428','518','285','584'] +frequent_icd9code = ['4019', '4280', '42731', '41401', '5849', '25000', '2724', '51881', '5990', '53081'] +frequent_icd10category = ['I10', 'I25', 'E78', 'I50', 'I48', 'N17', 'E87', 'E11', 'J96', 'N39'] +frequent_icd10code = ['I10', 'I2510', 'I509', 'I4891', 'N179', 'E119', 'E784', 'E785', 'J9690', 'J9600'] + +def mlmodel_data(df, icdtype): + X = df['discharge_diagnosis'].values + y = {} + y['icd9cat'] = np.array([labeltarget(x, frequent_icd9category) for x in df['ICD9_CATEGORY'].values]) + y['icd9code'] = np.array([labeltarget(x, frequent_icd9category) for x in df['ICD9_CODE'].values]) + y['icd10cat'] = np.array([labeltarget(x, frequent_icd9category) for x in df['ICD10_CATEGORY'].values]) + y['icd10code'] = np.array([labeltarget(x, frequent_icd9category) for x in df['ICD10'].values]) + return X, y[icdtype]