|
a |
|
b/downstream_analysis/analyze_rna_protein.R |
|
|
1 |
## this script is used to plot correlation, q values, top features |
|
|
2 |
|
|
|
3 |
source('./publication_plot_theme.R') |
|
|
4 |
source('./utilities.R') |
|
|
5 |
version='covid' |
|
|
6 |
num_topic=100 |
|
|
7 |
########### read correlation data, GSEA q values data |
|
|
8 |
save_path=paste0('../plot/',version,'/') |
|
|
9 |
|
|
|
10 |
plot_corr=read.csv(paste0('../plot/',version,'/plot_correlation.csv')) |
|
|
11 |
q_rna=read.csv(paste0('../plot/',version,'/q_values_c7_rna.csv')) |
|
|
12 |
q_protein=read.csv(paste0('../plot/',version,'/q_values_c7_protein.csv')) |
|
|
13 |
|
|
|
14 |
corr_color=rep(NA,num_topic) |
|
|
15 |
corr_color[plot_corr$cor>=0]='red' |
|
|
16 |
corr_color[plot_corr$cor<0]='blue' |
|
|
17 |
corr_plot=data.frame(x=1:num_topic,y=plot_corr$cor,color=corr_color) |
|
|
18 |
|
|
|
19 |
q_rna_plot=data.frame(x=q_rna$x,y=-log(q_rna$y+exp(-20)),color=q_rna$colorr) |
|
|
20 |
q_rna_plot$label=ifelse(q_rna_plot$y>(-log(0.01)),q_rna_plot$x,'') |
|
|
21 |
|
|
|
22 |
q_protein_plot=data.frame(x=q_protein$x,y=-log(q_protein$y+exp(-20)),color=q_protein$colorr) |
|
|
23 |
q_protein_plot$label=ifelse(q_protein_plot$y>(-log(0.01)),q_protein_plot$x,'') |
|
|
24 |
|
|
|
25 |
###################### plot_corr |
|
|
26 |
save_name=paste0(save_path,'gene_protein_corr.png') |
|
|
27 |
plot_correlation(corr_plot,save_name) |
|
|
28 |
|
|
|
29 |
##################### plot q value for genes |
|
|
30 |
save_name=paste0(save_path,'gene_gsea_q_value_rna.png') |
|
|
31 |
plot_q_value(q_rna_plot,save_name) |
|
|
32 |
|
|
|
33 |
###################### plot q values for proteins |
|
|
34 |
save_name=paste0(save_path,'protein_gsea_q_value_atac.png') |
|
|
35 |
plot_q_value(q_protein_plot,save_name) |
|
|
36 |
|
|
|
37 |
###################### plot p values for differentially expressed topics |
|
|
38 |
topic_covid_p=read.csv(paste0('../plot/',version,'/topic_covid_p.csv')) |
|
|
39 |
topic_severity_p=read.csv(paste0('../plot/',version,'/topic_severity_p.csv')) |
|
|
40 |
## |
|
|
41 |
covid_p_plot=data.frame(topic=1:num_topic,p=topic_covid_p$Covid, |
|
|
42 |
color=ifelse(topic_covid_p$Covid<0.001,'#4DBBD5FF','#F39B7FFF')) |
|
|
43 |
covid_p_plot$label=ifelse(covid_p_plot$p<0.001,covid_p_plot$topic,'') |
|
|
44 |
save_name=paste0(save_path,'covid_topic_p_value.png') |
|
|
45 |
plot_topic_p_value(covid_p_plot,save_name) |
|
|
46 |
##severity |
|
|
47 |
severity_p_plot=data.frame(topic=1:num_topic, |
|
|
48 |
p=topic_severity_p$Critical, |
|
|
49 |
color=ifelse(topic_severity_p$Critical<0.001,'#4DBBD5FF','#F39B7FFF')) |
|
|
50 |
severity_p_plot$label=ifelse(severity_p_plot$p<0.001,severity_p_plot$topic,'') |
|
|
51 |
save_name=paste0(save_path,'critical_topic_p_value.png') |
|
|
52 |
plot_topic_p_value(severity_p_plot,save_name) |
|
|
53 |
|
|
|
54 |
####################### plot sample by topic heatmap |
|
|
55 |
type='rna_protein' |
|
|
56 |
is_plot=F |
|
|
57 |
is_diff_topic=T |
|
|
58 |
is_all=F |
|
|
59 |
topic_select='48,78,5,80,83,98,4,23,99,47,21,85,24,63,6,55,22,86,31,39,69,77,26,74,91,42,87' |
|
|
60 |
args=c(num_topic,version,type,topic_select,is_plot,is_diff_topic,is_all) |
|
|
61 |
system(paste(c('Rscript','./plot_sample_by_topic.R',args),collapse=' ')) |
|
|
62 |
|
|
|
63 |
|
|
|
64 |
####################### plot top features |
|
|
65 |
#read gene by topic matrix, and all gene names |
|
|
66 |
rna_topic=read.csv(paste0('../Result/',version,'/topic_rna.csv'),header=T) |
|
|
67 |
rna_names=rna_topic[,1] |
|
|
68 |
rna_topic=rna_topic[,-1] |
|
|
69 |
rownames(rna_topic)=rna_names |
|
|
70 |
colnames(rna_topic)=paste0('topic',1:num_topic) |
|
|
71 |
|
|
|
72 |
#read protein by topic matrix, and all protein names |
|
|
73 |
adt_topic=read.csv(paste0('../Result/',version,'/topic_protein.csv'),header=T) |
|
|
74 |
protein_names=adt_topic[,1] |
|
|
75 |
adt_topic=adt_topic[,-1] |
|
|
76 |
|
|
|
77 |
if (version=='covid'){ #remove AB_ in covid protein names. Optional data preparation process |
|
|
78 |
protein_names_new=c() |
|
|
79 |
for (i in protein_names){ |
|
|
80 |
protein_names_new=c(protein_names_new,paste0(strsplit(i,'AB_')[[1]][-1])) |
|
|
81 |
} |
|
|
82 |
rownames(adt_topic)=protein_names_new |
|
|
83 |
} else{ |
|
|
84 |
rownames(adt_topic)=protein_names |
|
|
85 |
} |
|
|
86 |
|
|
|
87 |
colnames(adt_topic)=paste0('topic',1:num_topic) |
|
|
88 |
|
|
|
89 |
#####select 10 example topics |
|
|
90 |
## order features |
|
|
91 |
gene_ids=get_feature_orders(rna_topic,num_topic=num_topic) |
|
|
92 |
protein_ids=get_feature_orders(adt_topic,num_topic=num_topic) |
|
|
93 |
|
|
|
94 |
|
|
|
95 |
topic_select=c(4,5,24,31,42,74,76,85,86,91) |
|
|
96 |
top_feature_num=5 |
|
|
97 |
save_path=paste0('../plot/',version,'/') |
|
|
98 |
|
|
|
99 |
plot_top_feature_in_selected_topic('gene',top_feature_num,gene_ids,protein_ids,rna_topic,adt_topic,topic_select,save_path,'select') |
|
|
100 |
plot_top_feature_in_selected_topic('protein',top_feature_num,gene_ids,protein_ids,rna_topic,adt_topic,topic_select,save_path,'select') |
|
|
101 |
|
|
|
102 |
|
|
|
103 |
|
|
|
104 |
#######check if top features are cell marker |
|
|
105 |
ref=read.table('../useful_file/Human_cell_markers.txt',sep='\t',header=T) |
|
|
106 |
#sort(unique(ref$cellName))#check match names first |
|
|
107 |
cell_name='CD8+ T cell'# |
|
|
108 |
|
|
|
109 |
a=ref[ref$cellName%in%cell_name,c(8,9,11)] |
|
|
110 |
markers=c() |
|
|
111 |
for (i in 1:dim(a)[1]){ |
|
|
112 |
for (j in 1:dim(a)[2]){ |
|
|
113 |
b=strsplit(a[i,j],', ')[[1]] |
|
|
114 |
markers=c(markers,b) |
|
|
115 |
} |
|
|
116 |
} |
|
|
117 |
markers=unique(markers) |
|
|
118 |
|
|
|
119 |
g=gene_ids[1:5,50]#topic num |
|
|
120 |
p=simu_gene_ids[1:5,50] |
|
|
121 |
## check if top features are biomarkers |
|
|
122 |
g %in% markers |
|
|
123 |
p %in% markers |
|
|
124 |
|
|
|
125 |
|