--- a +++ b/downstream_analysis/analyze_rna_protein.R @@ -0,0 +1,125 @@ +## this script is used to plot correlation, q values, top features + +source('./publication_plot_theme.R') +source('./utilities.R') +version='covid' +num_topic=100 +########### read correlation data, GSEA q values data +save_path=paste0('../plot/',version,'/') + +plot_corr=read.csv(paste0('../plot/',version,'/plot_correlation.csv')) +q_rna=read.csv(paste0('../plot/',version,'/q_values_c7_rna.csv')) +q_protein=read.csv(paste0('../plot/',version,'/q_values_c7_protein.csv')) + +corr_color=rep(NA,num_topic) +corr_color[plot_corr$cor>=0]='red' +corr_color[plot_corr$cor<0]='blue' +corr_plot=data.frame(x=1:num_topic,y=plot_corr$cor,color=corr_color) + +q_rna_plot=data.frame(x=q_rna$x,y=-log(q_rna$y+exp(-20)),color=q_rna$colorr) +q_rna_plot$label=ifelse(q_rna_plot$y>(-log(0.01)),q_rna_plot$x,'') + +q_protein_plot=data.frame(x=q_protein$x,y=-log(q_protein$y+exp(-20)),color=q_protein$colorr) +q_protein_plot$label=ifelse(q_protein_plot$y>(-log(0.01)),q_protein_plot$x,'') + +###################### plot_corr +save_name=paste0(save_path,'gene_protein_corr.png') +plot_correlation(corr_plot,save_name) + +##################### plot q value for genes +save_name=paste0(save_path,'gene_gsea_q_value_rna.png') +plot_q_value(q_rna_plot,save_name) + +###################### plot q values for proteins +save_name=paste0(save_path,'protein_gsea_q_value_atac.png') +plot_q_value(q_protein_plot,save_name) + +###################### plot p values for differentially expressed topics +topic_covid_p=read.csv(paste0('../plot/',version,'/topic_covid_p.csv')) +topic_severity_p=read.csv(paste0('../plot/',version,'/topic_severity_p.csv')) +## +covid_p_plot=data.frame(topic=1:num_topic,p=topic_covid_p$Covid, + color=ifelse(topic_covid_p$Covid<0.001,'#4DBBD5FF','#F39B7FFF')) +covid_p_plot$label=ifelse(covid_p_plot$p<0.001,covid_p_plot$topic,'') +save_name=paste0(save_path,'covid_topic_p_value.png') +plot_topic_p_value(covid_p_plot,save_name) +##severity +severity_p_plot=data.frame(topic=1:num_topic, + p=topic_severity_p$Critical, + color=ifelse(topic_severity_p$Critical<0.001,'#4DBBD5FF','#F39B7FFF')) +severity_p_plot$label=ifelse(severity_p_plot$p<0.001,severity_p_plot$topic,'') +save_name=paste0(save_path,'critical_topic_p_value.png') +plot_topic_p_value(severity_p_plot,save_name) + +####################### plot sample by topic heatmap +type='rna_protein' +is_plot=F +is_diff_topic=T +is_all=F +topic_select='48,78,5,80,83,98,4,23,99,47,21,85,24,63,6,55,22,86,31,39,69,77,26,74,91,42,87' +args=c(num_topic,version,type,topic_select,is_plot,is_diff_topic,is_all) +system(paste(c('Rscript','./plot_sample_by_topic.R',args),collapse=' ')) + + +####################### plot top features +#read gene by topic matrix, and all gene names +rna_topic=read.csv(paste0('../Result/',version,'/topic_rna.csv'),header=T) +rna_names=rna_topic[,1] +rna_topic=rna_topic[,-1] +rownames(rna_topic)=rna_names +colnames(rna_topic)=paste0('topic',1:num_topic) + +#read protein by topic matrix, and all protein names +adt_topic=read.csv(paste0('../Result/',version,'/topic_protein.csv'),header=T) +protein_names=adt_topic[,1] +adt_topic=adt_topic[,-1] + +if (version=='covid'){ #remove AB_ in covid protein names. Optional data preparation process + protein_names_new=c() + for (i in protein_names){ + protein_names_new=c(protein_names_new,paste0(strsplit(i,'AB_')[[1]][-1])) + } + rownames(adt_topic)=protein_names_new +} else{ + rownames(adt_topic)=protein_names +} + +colnames(adt_topic)=paste0('topic',1:num_topic) + +#####select 10 example topics +## order features +gene_ids=get_feature_orders(rna_topic,num_topic=num_topic) +protein_ids=get_feature_orders(adt_topic,num_topic=num_topic) + + +topic_select=c(4,5,24,31,42,74,76,85,86,91) +top_feature_num=5 +save_path=paste0('../plot/',version,'/') + +plot_top_feature_in_selected_topic('gene',top_feature_num,gene_ids,protein_ids,rna_topic,adt_topic,topic_select,save_path,'select') +plot_top_feature_in_selected_topic('protein',top_feature_num,gene_ids,protein_ids,rna_topic,adt_topic,topic_select,save_path,'select') + + + +#######check if top features are cell marker +ref=read.table('../useful_file/Human_cell_markers.txt',sep='\t',header=T) +#sort(unique(ref$cellName))#check match names first +cell_name='CD8+ T cell'# + +a=ref[ref$cellName%in%cell_name,c(8,9,11)] +markers=c() +for (i in 1:dim(a)[1]){ + for (j in 1:dim(a)[2]){ + b=strsplit(a[i,j],', ')[[1]] + markers=c(markers,b) + } +} +markers=unique(markers) + +g=gene_ids[1:5,50]#topic num +p=simu_gene_ids[1:5,50] +## check if top features are biomarkers +g %in% markers +p %in% markers + +