--- a +++ b/modules/DEomics.py @@ -0,0 +1,44 @@ +from statsmodels.stats.multitest import multipletests +import sys +import pandas as pd +import numpy as np +from scipy.stats import ranksums +import argparse + +def de_omics(dataset,clinical,out_f,label="PH_EOSI_B_300"): + # filter out samples whose both omics data and clinical data are present + d123456 = np.intersect1d(dataset.index.astype(int), clinical[label].dropna().index) + p_label2 = clinical.loc[d123456][label] + df = dataset.loc[d123456] + g1 = p_label2==1.0 + g2 = p_label2==2.0 + li_t,li_p = [],[] + for target in df.columns: + t,p = ranksums(df.T.loc[target][g1], df.T.loc[target][g2]) + li_t.append(t) + li_p.append(p) + + li_p_adj = multipletests(li_p) + out = zip(df.columns, li_p_adj[1], li_t) + + with open(out_f,'w') as f: + for target,p,t in out: + f.write('%s\t%f\t%.5f\n'%(target,p,t)) + +if __name__=="__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--subgroup",dest='subgroup_label',required=True) + parser.add_argument("--clinical",required=True) + parser.add_argument("--omics",required=True) + parser.add_argument("--out","-o",dest='o',required=True) + args = parser.parse_args() + + # clinical data + clinical = pd.read_csv(args.clinical,index_col=0,sep="\t") + + # omics data + omics = pd.read_csv(args.omics,sep="\t",index_col=0).T + omics_data = omics.fillna(0) + omics_data.index = omics_data.index.astype(int) + + de_omics(omics_data, clinical, args.o, label=args.subgroup_label)