a b/downstream_analysis/analyze_rna_protein.R
1
## this script is used to plot correlation, q values, top features
2
3
source('./publication_plot_theme.R')
4
source('./utilities.R')
5
version='covid'
6
num_topic=100
7
########### read correlation data, GSEA q values data
8
save_path=paste0('../plot/',version,'/')
9
10
plot_corr=read.csv(paste0('../plot/',version,'/plot_correlation.csv'))
11
q_rna=read.csv(paste0('../plot/',version,'/q_values_c7_rna.csv'))
12
q_protein=read.csv(paste0('../plot/',version,'/q_values_c7_protein.csv'))
13
14
corr_color=rep(NA,num_topic)
15
corr_color[plot_corr$cor>=0]='red'
16
corr_color[plot_corr$cor<0]='blue'
17
corr_plot=data.frame(x=1:num_topic,y=plot_corr$cor,color=corr_color)
18
19
q_rna_plot=data.frame(x=q_rna$x,y=-log(q_rna$y+exp(-20)),color=q_rna$colorr)
20
q_rna_plot$label=ifelse(q_rna_plot$y>(-log(0.01)),q_rna_plot$x,'')
21
22
q_protein_plot=data.frame(x=q_protein$x,y=-log(q_protein$y+exp(-20)),color=q_protein$colorr)
23
q_protein_plot$label=ifelse(q_protein_plot$y>(-log(0.01)),q_protein_plot$x,'')
24
25
###################### plot_corr
26
save_name=paste0(save_path,'gene_protein_corr.png')
27
plot_correlation(corr_plot,save_name)
28
29
#####################  plot q value for genes
30
save_name=paste0(save_path,'gene_gsea_q_value_rna.png')
31
plot_q_value(q_rna_plot,save_name)
32
33
######################  plot q values for proteins
34
save_name=paste0(save_path,'protein_gsea_q_value_atac.png')
35
plot_q_value(q_protein_plot,save_name)
36
37
######################  plot p values for differentially expressed topics
38
topic_covid_p=read.csv(paste0('../plot/',version,'/topic_covid_p.csv'))
39
topic_severity_p=read.csv(paste0('../plot/',version,'/topic_severity_p.csv'))
40
##
41
covid_p_plot=data.frame(topic=1:num_topic,p=topic_covid_p$Covid,
42
                        color=ifelse(topic_covid_p$Covid<0.001,'#4DBBD5FF','#F39B7FFF'))
43
covid_p_plot$label=ifelse(covid_p_plot$p<0.001,covid_p_plot$topic,'')
44
save_name=paste0(save_path,'covid_topic_p_value.png')
45
plot_topic_p_value(covid_p_plot,save_name)
46
##severity
47
severity_p_plot=data.frame(topic=1:num_topic,
48
                           p=topic_severity_p$Critical,
49
                           color=ifelse(topic_severity_p$Critical<0.001,'#4DBBD5FF','#F39B7FFF'))
50
severity_p_plot$label=ifelse(severity_p_plot$p<0.001,severity_p_plot$topic,'')
51
save_name=paste0(save_path,'critical_topic_p_value.png')
52
plot_topic_p_value(severity_p_plot,save_name)
53
54
#######################  plot sample by topic heatmap
55
type='rna_protein'
56
is_plot=F
57
is_diff_topic=T
58
is_all=F
59
topic_select='48,78,5,80,83,98,4,23,99,47,21,85,24,63,6,55,22,86,31,39,69,77,26,74,91,42,87'
60
args=c(num_topic,version,type,topic_select,is_plot,is_diff_topic,is_all)
61
system(paste(c('Rscript','./plot_sample_by_topic.R',args),collapse=' '))
62
63
64
#######################  plot top features
65
#read gene by topic matrix, and all gene names
66
rna_topic=read.csv(paste0('../Result/',version,'/topic_rna.csv'),header=T)
67
rna_names=rna_topic[,1]
68
rna_topic=rna_topic[,-1]
69
rownames(rna_topic)=rna_names
70
colnames(rna_topic)=paste0('topic',1:num_topic)
71
72
#read protein by topic matrix, and all protein names
73
adt_topic=read.csv(paste0('../Result/',version,'/topic_protein.csv'),header=T)
74
protein_names=adt_topic[,1]
75
adt_topic=adt_topic[,-1]
76
77
if (version=='covid'){  #remove AB_ in covid protein names. Optional data preparation process
78
  protein_names_new=c()
79
  for (i in protein_names){
80
    protein_names_new=c(protein_names_new,paste0(strsplit(i,'AB_')[[1]][-1]))
81
  }
82
  rownames(adt_topic)=protein_names_new
83
} else{
84
  rownames(adt_topic)=protein_names
85
}
86
87
colnames(adt_topic)=paste0('topic',1:num_topic)
88
89
#####select 10 example topics
90
## order features
91
gene_ids=get_feature_orders(rna_topic,num_topic=num_topic)
92
protein_ids=get_feature_orders(adt_topic,num_topic=num_topic)
93
94
95
topic_select=c(4,5,24,31,42,74,76,85,86,91)
96
top_feature_num=5
97
save_path=paste0('../plot/',version,'/')
98
99
plot_top_feature_in_selected_topic('gene',top_feature_num,gene_ids,protein_ids,rna_topic,adt_topic,topic_select,save_path,'select')
100
plot_top_feature_in_selected_topic('protein',top_feature_num,gene_ids,protein_ids,rna_topic,adt_topic,topic_select,save_path,'select')
101
102
103
104
#######check if top features are cell marker
105
ref=read.table('../useful_file/Human_cell_markers.txt',sep='\t',header=T)
106
#sort(unique(ref$cellName))#check match names first
107
cell_name='CD8+ T cell'#
108
109
a=ref[ref$cellName%in%cell_name,c(8,9,11)]
110
markers=c()
111
for (i in 1:dim(a)[1]){
112
  for (j in 1:dim(a)[2]){
113
    b=strsplit(a[i,j],', ')[[1]]
114
    markers=c(markers,b)
115
  }
116
}
117
markers=unique(markers)
118
119
g=gene_ids[1:5,50]#topic num
120
p=simu_gene_ids[1:5,50]
121
## check if top features are biomarkers
122
g %in% markers
123
p %in% markers
124
125