Switch to unified view

a b/Fig2_microenvironment_analysis.R
1
GIT_HOME="/research/users/ppolonen/git_home/ImmunogenomicLandscape-BloodCancers/"
2
source(file.path(GIT_HOME, "common_scripts/featurematrix/functions_generate_fm.R"))
3
source(file.path(GIT_HOME, "common_scripts/visualisation/plotting_functions.R"))
4
source(file.path(GIT_HOME, "common_scripts/statistics/functions_statistics.R"))
5
source(file.path(GIT_HOME, "common_scripts/statistics/statistics_wrappers.R"))
6
library(parallel)
7
library(ggplot2)
8
library(reshape2)
9
library(RColorBrewer)
10
library(ggrepel)
11
library(gridExtra)
12
library(survcomp)
13
14
setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/Published_data_figures")
15
16
# annotations
17
annot=get(load("Hemap_immunology_Annotations.Rdata"))
18
19
# data
20
load("data9544_with_gene_symbols.RData")
21
data = t(data[rownames(data)%in%annot$GSM.identifier..sample.,])
22
profile=data.matrix(get(load("mixtureM_profile.Rdata")))
23
profile[profile==-1] = 0
24
profile=profile[,colnames(profile)%in%colnames(data)]
25
26
# geometric mean:
27
gm=annot$CytolyticScore
28
29
# logical vectors for all diseases:
30
subclass2=get.logical(annovector = list(annot$subclasses), filterv = annot$CELLS_SORTED==0&annot$Sample.type%in%c("Prolif", "Cancer"))
31
tbly2=get.logical(annovector = list(annot$tbLY), filterv = annot$CELLS_SORTED==0&annot$Sample.type%in%c("Prolif", "Cancer"))
32
tbly2=tbly2[!names(tbly2)%in%"Lymphoma_BCL"]
33
34
list_cancers=lapply(c(subclass2, tbly2), list)
35
list_cancers=unlist(list_cancers, recursive=F)
36
37
l=c("BCL_DLBCL", "Lymphoma_BCL_CHL", "Lymphoma_BCL_MCL", "Lymphoma_BCL_FL","CLL", "CML", "AML","Prolif_Myeloproliferative_MDS", "pre-B-ALL",  "T-ALL")
38
logicalvectors=list_cancers[match(l, names(list_cancers))]
39
names(logicalvectors)=gsub("^BCL_|Lymphoma_BCL_|Prolif_Myeloproliferative_", "", names(logicalvectors))
40
41
#********************************** compute summary statistics: **********************************
42
43
# Then compute cor, cor.test and TNK FC for same cancers:
44
res=do.call(cbind, mclapply(seq(logicalvectors), function(i){
45
  logv=logicalvectors[[i]]
46
  NAME=names(logicalvectors)[i]
47
  cors=cor(t(data[,logv]), as.numeric(gm)[logv], method = "spearman", use = "complete.obs")
48
}, mc.cores=10))
49
50
NK_log=grepl("CD8|NaturalKiller", annot[,5])
51
52
fc_res=do.call(cbind, mclapply(seq(logicalvectors), function(i){
53
  logv=logicalvectors[[i]]
54
  NAME=names(logicalvectors)[i]
55
56
  FC=apply(data[,], 1, function(v){
57
    mean(v[NK_log])-mean(v[logv])
58
  })
59
60
}, mc.cores=10))
61
62
res_pval=do.call(cbind, mclapply(seq(logicalvectors), function(i){
63
  logv=logicalvectors[[i]]
64
  NAME=names(logicalvectors)[i]
65
  pvals=-log10(apply(data[,logv,drop=F], 1, cor.test.p, as.numeric(gm)[logv]))
66
  pvals[!is.finite(pvals)]=224
67
  return(pvals)
68
}, mc.cores=10))
69
70
# these matrices can be used for plotting:
71
colnames(res)=names(logicalvectors)
72
colnames(fc_res)=names(logicalvectors)
73
colnames(res_pval)=names(logicalvectors)
74
75
# sorted samples, see if genes are expressed in cancer
76
subclass2=get.logical(annovector = list(annot$subclasses), filterv = annot$CELLS_SORTED==1&annot$Sample.type%in%c("Prolif", "Cancer"))
77
78
# diseases used minus FL with 7 samples
79
l=c("AML", "BCL", "CLL", "CML", "pre-B-ALL", "Prolif_Myeloproliferative_MDS", "T-ALL")
80
logicalvectors.sorted=subclass2[names(subclass2)%in%l]
81
names(logicalvectors.sorted)=gsub("^BCL_|Lymphoma_BCL_|Prolif_Myeloproliferative_", "", names(logicalvectors.sorted))
82
83
pval.sorted=fisher.matrix.lv(logicalVector.list = logicalvectors.sorted, data = profile, to.log10 = T, adjust.method = "BH", fisher.alternative = "greater")
84
pval.sorted=pval.sorted[match(rownames(res), rownames(pval.sorted)),]
85
86
# All tests
87
save(list = c("pval.sorted", "fc_res", "res", "res_pval", "pval.sorted"), file = "Hemap_microenvironment_summary_statistics.Rdata")
88
89
#*****************************************************************************************************
90
91
# load from above R object:
92
load("Hemap_microenvironment_summary_statistics.Rdata")
93
94
res_all=do.call(rbind, mclapply(seq(logicalvectors), function(i){
95
  logv=logicalvectors[[i]]
96
  NAME=names(logicalvectors)[i]
97
  
98
  # correlate genes with cytolytic activity
99
  pvals=apply(data[,logv,drop=F], 1, cor.test.p, as.numeric(gm)[logv])
100
  FC=fc_res[match(names(pvals), rownames(fc_res)),NAME]
101
  cors=res[match(names(pvals), rownames(res)),NAME]
102
  
103
  mean.exp=apply(data[,logv,drop=F], 1, median)
104
  
105
  data_cor=data.frame("gene"=names(cors), "Rho"=as.numeric(cors), "pval"=pvals, "adj.pval"=p.adjust(pvals), "FCtoTNK"=as.numeric(FC), "disease"=NAME, "mean.expression"=mean.exp, stringsAsFactors = F)
106
  data_cor=data_cor[order(data_cor[,2], decreasing = T),]
107
  
108
  if(!dim(data_cor)[1])return(NULL)
109
  
110
  # significant
111
  data_cor$significant=data_cor$adj.pval<0.01&abs(data_cor$FCtoTNK)>2&abs(data_cor$Rho)>0.2
112
  
113
  # add category:
114
  data_cor$category=""
115
  data_cor$category[data_cor$FCtoTNK>0&data_cor$Rho>0&data_cor$significant]="CTL/NK gene"
116
  data_cor$category[data_cor$FCtoTNK<0&data_cor$Rho>0&data_cor$significant]="Stromal/cancer gene (Rho > 0)"
117
  data_cor$category[data_cor$FCtoTNK<0&data_cor$Rho<0&data_cor$significant]="Stromal/cancer gene (Rho < 0)"
118
  
119
  return(data_cor)
120
}, mc.cores=10))
121
122
res_all.filt=res_all[res_all$significant,]
123
124
res_all.filt=res_all.filt[order(res_all.filt$category, abs(res_all.filt$Rho), abs(res_all.filt$FCtoTNK), -res_all.filt$adj.pval, decreasing = T),]
125
126
write.table(res_all[res_all$gene%in%res_all.filt$gene,], "Hemap_cytolytic_correlated_genes_TableS2.txt", col.names=T,row.names=F, quote = F, sep="\t")
127
128
for(i in 2:5)res_all.filt[,i]=prettyNum(signif(res_all.filt[,i], 2))
129
130
write.table(res_all.filt, "TableS2.txt", col.names=T,row.names=F, quote = F, sep="\t")
131
save(res_all.filt, file="Hemap_cytolytic_correlated_genes_TableS2_onlysignif.Rdata")
132
133
#***************************************************************************************************************************
134
fun.plot=function(dat, main, xlab, ylab, aval=2, bval=0.2,cval=0.01,genelist=NULL, MAX=25, SIZE=3, T.SIZE=8, height=7.5, width=6){
135
136
  if(!is.null(genelist)){
137
    dat$significant=dat$gene%in%genelist
138
  }else{
139
    dat$significant=dat$adj.pval<cval&abs(dat$Rho)>bval&abs(dat$FCtoTNK)>aval
140
  }
141
  
142
  groups=unique(dat$category)
143
  dat$plot=F
144
  
145
  for(g in groups){
146
    a2=dat$FCtoTNK[dat$category%in%g]
147
    b2=dat$Rho[dat$category%in%g]
148
    gene=dat$gene[dat$category%in%g]
149
    test=tail(gene[order(abs(a2^2*b2^2))], MAX)
150
    dat$plot[dat$gene%in%test&dat$category%in%g&dat$significant]=T
151
  }
152
  
153
  dat=dat[order(dat$plot),]
154
  
155
  dat$size.point=ifelse(dat$plot, 2.4, 0.6)
156
  
157
  myColors <- data.frame(c("grey85","#E41A1C", "grey85","#377EB8"), c("", "CTL/NK gene", "Stromal/cancer gene (Rho < 0)", "Stromal/cancer gene (Rho > 0)"), stringsAsFactors = F)
158
  myColors=myColors[match(unique(dat$category), myColors[,2]),1]
159
  names(myColors) <- unique(dat$category)
160
  
161
  colScale <- scale_colour_manual(name = "category", values = myColors)
162
  
163
  library(ggrastr)
164
  
165
  p=ggplot(dat, aes(FCtoTNK, Rho, colour = category)) +
166
    geom_point_rast(aes(size = dat$size.point), raster.width = width, raster.height = height) +
167
    scale_size_continuous(range = c(0.6, 2.5))+
168
    colScale +
169
    theme_classic(base_size = T.SIZE) +
170
    ggtitle(main) +
171
    labs(x = xlab, y = ylab, fill = "") +
172
    scale_y_continuous(breaks = pretty(dat$Rho, n = 6))+
173
    
174
    geom_text_repel(
175
      data = subset(dat, dat$plot),
176
      aes(label = subset(dat, dat$plot)$gene),
177
      size = SIZE,
178
      color="black",
179
      force=1.5,
180
      segment.size = 0.1,
181
      box.padding = unit(0.01, "lines"),
182
      point.padding = unit(0.01, "lines"),
183
      family="Helvetica"
184
    )+
185
    theme(legend.position = "none", legend.direction = "horizontal",
186
          axis.line = element_line(size=0.5, colour = "black"),
187
          panel.grid.major = element_line(colour = "#d3d3d3"),
188
          panel.grid.minor = element_blank(),
189
          panel.border = element_blank(), panel.background = element_blank(),
190
          plot.title = element_text(size = T.SIZE, face = "bold", family="Helvetica"),
191
          text=element_text()
192
    )+
193
    theme(axis.text.x = element_text(colour="grey20",size=T.SIZE,face="plain", family="Helvetica"),
194
          axis.text.y = element_text(colour="grey20",size=T.SIZE,face="plain", family="Helvetica"),  
195
          axis.title.x = element_text(colour="grey20",size=T.SIZE,face="plain", family="Helvetica"),
196
          axis.title.y = element_text(colour="grey20",size=T.SIZE,face="plain", family="Helvetica"))  
197
  return(p)
198
}
199
200
201
# Figure2A:
202
res_comb=d13=rowMeans(res[,c("DLBCL", "MCL", "FL", "CHL")])
203
fc_res_comb=rowMeans(fc_res[,c("DLBCL", "MCL", "FL", "CHL")])
204
205
pval_comb=apply(10^-res_pval[,c("DLBCL", "MCL", "FL", "CHL")], 1, combine.test, method = "z.transform")
206
207
data_comb=data.frame("gene"=names(res_comb), "Rho"=as.numeric(res_comb), "adj.pval"=pval_comb, "FCtoTNK"=as.numeric(fc_res_comb), "disease"="BCL", stringsAsFactors = F)
208
209
# significant
210
data_comb$significant=data_comb$adj.pval<0.01&abs(data_comb$FCtoTNK)>1.5&abs(data_comb$Rho)>0.2
211
212
# add category:
213
data_comb$category=""
214
data_comb$category[data_comb$FCtoTNK>0&data_comb$Rho>0&data_comb$significant]="CTL/NK gene"
215
data_comb$category[data_comb$FCtoTNK<0&data_comb$Rho>0&data_comb$significant]="Stromal/cancer gene (Rho > 0)"
216
data_comb$category[data_comb$FCtoTNK<0&data_comb$Rho<0&data_comb$significant]="Stromal/cancer gene (Rho < 0)"
217
218
# these were picked from above analysis, as they were linked to immunological regulation and myeloid infiltrate
219
monoc=c(c("CD14", "LYZ", "S100A8", "CD68", "CD163","MS4A4A", "MS4A6A","TLR8","LILRB2","IDO1","IFI27", "CXCL9", "CXCL10", "CXCL11", "CCL8", "CCL18", "CCL19", "TNFSF13B","CXCL13", "C1QA", "C1QB", "C1QC", "HBB"), c("IFNG", "CCL5","TNFSF10", "CD226", "CD160","KLRF1","KLRG1", "KLRC3", "EOMES", "GZMA", "PRF1", "CD8A"))
220
221
lymphomagenes=unique(res_all.filt$gene[res_all.filt$disease%in%c("DLBCL", "MCL", "FL", "CHL")])
222
223
NAME="Figure2A.pdf"
224
dat=fun.plot(dat = data_comb,
225
             main="B cell lymphomas",
226
             genelist=unique(monoc[monoc%in%lymphomagenes]),
227
             # genelist2=unique(lymphomagenes),
228
             aval=1.5, 
229
             bval=0.35,
230
             cval = 0.01,
231
             MAX = 60,
232
             SIZE=2.5,
233
             T.SIZE = 10,
234
             ylab="Average correlation with cytolytic score",
235
             xlab= paste0("Average expression fold change\n(CTL/NK cells vs. ", "BCL", ")"))
236
237
ggsave(filename = NAME, plot = dat, width = 85, height = 120, units = "mm", dpi=150)
238
239
240
# FigureS2:
241
NAME="FigureS2C.pdf"
242
p.all=lapply(seq(logicalvectors)[-4], function(i){
243
  fun.plot(dat = res_all[res_all$disease%in%names(logicalvectors)[i],],
244
           main=names(logicalvectors)[i],
245
           aval=2, 
246
           bval=0.2,
247
           cval = 0.01,
248
           MAX = 20,
249
           SIZE=1.75,
250
           T.SIZE = 8,
251
           ylab="correlation with cytolytic activity",
252
           xlab= paste0("expression fold change\n(CTL/NK cells vs. ", names(logicalvectors)[i], ")"))
253
})
254
ggsave(NAME, ggpubr::ggarrange(plotlist = p.all, ncol = 3, nrow = 3),  width = 152, height = 200, units = "mm", dpi=300)
255
256
# plot dotplot:
257
data.plot=data.frame(res_all$gene, res_all$disease, res_all$Rho, abs(res_all$FCtoTNK), "category"=res_all$category)
258
data.plot=data.plot[data.plot$res_all.gene%in%res_all.filt$gene,]
259
colnames(data.plot)=c("features", "id", "variable.1", "variable.2", "category")
260
data.plot$id=factor(data.plot$id,  levels=c("DLBCL","CHL", "FL", "MCL", "CLL", "CML", "AML", "MDS", "pre-B-ALL", "T-ALL"))
261
262
#************************************ Figure 2B: ***********************************
263
264
TNK.genes=rownames(pval.sorted)[rowSums(pval.sorted>2)==0&rownames(pval.sorted)%in%res_all.filt$gene[res_all.filt$category=="CTL/NK gene"]]
265
266
monoc=c(c("IFNG", "CCL5","TNFSF10", "CD226", "CD160","KLRF1","KLRG1", "KLRC3", "EOMES"), c("CD14", "LYZ", "S100A8", "CD68", "CD163","MS4A4A", "MS4A6A","TLR8","LILRB2","IDO1","IFI27", "CXCL9", "CXCL10", "CXCL11", "CCL8", "CCL18", "CCL19", "TNFSF13B","CXCL13", "C1QA", "C1QB", "C1QC", "HBB"))
267
268
d=data.plot[data.plot$features%in%monoc,]
269
d=d[order(match(d$features, monoc)),]
270
271
d$features=factor(as.character(d$features), levels = monoc)
272
273
# pdf("Hemap_microenvironment_Figure2_stroma.pdf", height = 4, width = 3.25)
274
# plot.DotPlot.df(data.plot = d, name.variable.1 = "Rho", name.variable.2 = "FC", cols = c("blue", "white", "red"), dot.scale = 3, col.min = -0.5, col.max = 0.75)
275
# dev.off()
276
# 
277
# pdf("Hemap_microenvironment_TNK_genes.pdf", height = 7, width = 3.25)
278
# plot.DotPlot.df(data.plot = data.plot[data.plot$features%in%TNK.genes,], name.variable.1 = "Rho", name.variable.2 = "FC", cols = c("blue", "white", "red"), dot.scale = 3, col.min = -0.5, col.max = 0.75)
279
# dev.off()
280
281
# plot dotplot:
282
data.plot=data.frame(res_all$gene, res_all$disease, res_all$Rho, res_all$mean.expression, "category"=res_all$category)
283
data.plot=data.plot[data.plot$res_all.gene%in%res_all.filt$gene,]
284
colnames(data.plot)=c("features", "id", "variable.1", "variable.2", "category")
285
data.plot$id=factor(data.plot$id,  levels=c("DLBCL","CHL", "FL", "MCL", "CLL", "CML", "AML", "MDS", "pre-B-ALL", "T-ALL"))
286
287
d=data.plot[data.plot$features%in%monoc,]
288
d=d[order(match(d$features, monoc)),]
289
d$features=factor(as.character(d$features), levels = monoc)
290
291
292
pdf("Figure2B.pdf", height = 4, width = 4.25)
293
plot.DotPlot.df(data.plot = d, name.variable.1 = "Rho", name.variable.2 = "Median expression", cols = c("blue", "white", "red"), dot.scale = 3, col.min = -0.5, col.max = 0.75)
294
dev.off()
295
296
297
#*********************** Plot genes in normal cells ************************
298
normals2=get.logical(annovector = list(annot$immunoNormals),filterv = !annot$immunoNormals=="")
299
normals2=normals2[!names(normals2)%in%c("TcellActivatedMononuclear", "LangerhansCell", "Gamma-delta-Tcell", "AlveolarMacrophage")]
300
301
normals=get.logical(annovector = list(annot$plotNormals),filterv = !annot$plotNormals=="")
302
303
normals=append(normals, get.logical(annovector = list(annot$immunoNormals),filterv = grepl("Macrophage", annot$immunoNormals)))
304
normals=normals[!names(normals)%in%c("PBMC", "Langerhans Cell", "Langerhans cell",  "Gamma-delta-Tcell", "Macrophage", "AlveolarMacrophage")]
305
306
normals=append(normals, normals2[names(normals2)%in%"CD4+Tcell"])
307
308
median_res=do.call(cbind, mclapply(seq(normals), function(i){
309
  logv=normals[[i]]
310
  NAME=names(normals)[i]
311
  
312
  FC=apply(data[,], 1, function(v){
313
    median(v[logv])
314
  })
315
  
316
}, mc.cores=10))
317
318
colnames(median_res)=names(normals)
319
320
subclass2=get.logical(annovector = list(annot$subclasses), filterv = annot$CELLS_SORTED==1&annot$Sample.type%in%c("Prolif", "Cancer"))
321
l=c("AML", "BCL", "CLL", "CML", "pre-B-ALL", "Prolif_Myeloproliferative_MDS", "T-ALL")
322
logicalvectors.sorted=subclass2[names(subclass2)%in%l]
323
names(logicalvectors.sorted)=gsub("^BCL_|Lymphoma_BCL_|Prolif_Myeloproliferative_", "", names(logicalvectors.sorted))
324
325
median_res_sorted=do.call(cbind, mclapply(seq(logicalvectors.sorted), function(i){
326
  logv=logicalvectors.sorted[[i]]
327
  NAME=names(logicalvectors.sorted)[i]
328
  
329
  FC=apply(data[,], 1, function(v){
330
    median(v[logv])
331
  })
332
  
333
}, mc.cores=10))
334
335
colnames(median_res_sorted)=names(logicalvectors.sorted)
336
337
# picked, plot
338
d=t(scale(t(median_res[match(monoc, rownames(median_res)),])))
339
d[d>2]=2
340
d[d<(-2)]=2
341
342
pdf("Figre2B_NormalCells.pdf", width = 4, height = 7)
343
Heatmap(d, cluster_rows = F)
344
Heatmap(median_res[match(monoc, rownames(median_res)),], cluster_rows = F)
345
Heatmap(median_res_sorted[match(monoc, rownames(median_res_sorted)),], cluster_rows = F)
346
dev.off()
347
348
d=t(scale(t(median_res[match(TNK.genes, rownames(median_res_sorted)),])))
349
d[d>2]=2
350
d[d<(-2)]=2
351
352
# pdf("Fig2_normal_cells_allTNK_genes.pdf", width = 4, height = 15)
353
# Heatmap(d, cluster_rows = F)
354
# Heatmap(median_res[match(TNK.genes, rownames(median_res)),], cluster_rows = F)
355
# Heatmap(median_res_sorted[match(TNK.genes, rownames(median_res_sorted)),], cluster_rows = F)
356
# dev.off()