a b/Fig7_Univariate_Coxph_survival.R
1
GIT_HOME="/research/users/ppolonen/git_home/ImmunogenomicLandscape-BloodCancers/"
2
source(file.path(GIT_HOME, "common_scripts/visualisation/plotting_functions.R"))
3
source(file.path(GIT_HOME, "common_scripts/statistics/functions_statistics.R"))
4
source(file.path(GIT_HOME, "common_scripts/statistics/statistics_wrappers.R"))
5
source(file.path(GIT_HOME, "common_scripts/featurematrix/compute.pairwise.R"))
6
source(file.path(GIT_HOME, "common_scripts/featurematrix/functions_generate_fm.R"))
7
8
library(RColorBrewer)
9
library(survival)
10
library(data.table)
11
library(ggplot2)
12
library(ComplexHeatmap)
13
library(survMisc)
14
library(survminer)
15
library(plyr)
16
17
setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/Published_data_figures")
18
19
#******************************************** Hemap *********************************************
20
21
annot = get(load("Hemap_immunology_Annotations.Rdata"))
22
annot=annot[!is.na(annot$OS_Time),]
23
24
# survival time and status
25
TIME=annot$OS_Time
26
STATUS=as.numeric(annot$OS_Status)
27
TIME2=annot$PFS_Time
28
STATUS2=as.numeric(annot$PFS_Status)
29
30
#************************************** Process gene expression data ************************************
31
profile=data.matrix(get(load("mixtureM_profile.Rdata")))
32
profile[profile==-1] = 0
33
profile2=profile[,colnames(profile)%in%annot$GSM.identifier..sample.]
34
data=t(get(load("data9544_with_gene_symbols.RData")))
35
data=data[,colnames(data)%in%annot$GSM.identifier..sample.]
36
37
# take only high expressed into account, for CGA score
38
profile2[data<5]=0
39
40
#********************************************************************************************************
41
42
43
#********************************** Make necessary gene lists ***************************************
44
45
# other lists from each analysis
46
cga = read.delim("t.antigen_df.txt", stringsAsFactors=F, header=T)[,1]
47
co.stim=fread("costim_ligands_final.txt", data.table = F)
48
49
hlagenes=c("B2M", "HLA-A", "HLA-B", "HLA-C", "HLA-DMA", "HLA-DMB", "HLA-DPA1", "HLA-DPB1", "HLA-DRA", "HLA-DRB1", "CIITA")
50
immunoscore=c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")
51
52
# for AML:
53
MDS=get(load("MDS_genesets.Rdata"))
54
55
# significant per disease
56
microenv=get(load("Hemap_cytolytic_correlated_genes_TableS2_onlysignif.Rdata"))
57
58
#****************************************************************************************************
59
# data for survival
60
df=data.frame("time"=annot$OS_Time, "status"=annot$OS_Status, "HLAI"=annot$HLAIScore, "HLAII"=annot$HLAIIScore, "CytolyticScore"=annot$CytolyticScore, "Freq.CGA"=colSums(profile2[rownames(profile2)%in%cga,]), stringsAsFactors = F)
61
62
#********************************* just AML *********************************
63
filterv = annot$subclasses%in%"AML"&annot$CELLS_SORTED==0
64
logicalv=get.logical(annovector = list(annot$GSE.identifier..experiment.), filterv = filterv)
65
names(logicalv)=unique(paste(names(logicalv), annot$subclasses[filterv]))
66
67
filterv = annot$subclasses%in%"AML"&annot$CELLS_SORTED==0
68
logicalv2=get.logical(annovector = list(annot$subclasses), filterv = filterv)
69
names(logicalv2)="Hemap_AML"
70
logicalv=append(logicalv2, logicalv)
71
logicalv=logicalv[lapply(logicalv, sum)>2]
72
73
# HLA, cytolytic
74
DATA=data.frame("HLAI"=scale(annot$HLAIScore), "HLAII"=scale(annot$HLAIIScore), "CytolyticScore"=scale(annot$CytolyticScore))
75
aml_res=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
76
aml_res_multivar=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = F, pretty=F))
77
78
DATA_clin=data.frame("Age"=annot$AGE, "Gender"=annot$GENDER)
79
aml_res_clin=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_clin,TIME,STATUS, univariate = T, pretty=F))
80
aml_res_multivar=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_clin,TIME,STATUS, univariate = F, pretty=F))
81
82
DATA_hla=data.frame(t(data[hlagenes,]))
83
aml_res_HLA=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_hla,TIME,STATUS, univariate = T, pretty=F))
84
85
# costim
86
DATA=data.frame(scale(t(data[rownames(data)%in%c(co.stim[,1]),])))
87
aml_res_costim=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
88
89
# MDS:
90
DATA=data.frame(scale(t(data[rownames(data)%in%c(MDS$MDS_signature_all_filt),])))
91
aml_res_MDS=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
92
93
# microenvironment
94
ME=microenv[microenv$disease%in%"AML",]
95
DATA=data.frame(scale(t(data[rownames(data)%in%ME$gene,])))
96
aml_res_microenvironment=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
97
aml_res_microenvironment[aml_res_microenvironment$Adj.P<0.1,]
98
99
# subtype:
100
load("Hemap_AML_subtypes.Rdata")
101
coordinates.subtype=coordinates.subtype[!is.na(coordinates.subtype$subtype),]
102
103
samples=lapply(unique(coordinates.subtype$subtype), function(type)coordinates.subtype$ID[coordinates.subtype$subtype%in%type])
104
DATA=data.frame(do.call(cbind, lapply(samples, function(id)annot$GSM.identifier..sample.%in%id))*1)
105
colnames(DATA)=gsub("-", ".", unique(coordinates.subtype$subtype))
106
107
aml_res_subtype=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
108
109
# make table S6, adjusted p-value set here to correct for number of comparisons in total:
110
tableS7=rbind(aml_res, aml_res_subtype, aml_res_costim, aml_res_MDS, aml_res_microenvironment, aml_res_clin)
111
tableS7=tableS7[tableS7$Name%in%c("Hemap_AML"),]
112
tableS7$Adj.P=p.adjust(tableS7$P, method="BH")
113
114
tableS7[,2]=prettyNum(tableS7[,2])
115
tableS7[,3]=prettyNum(tableS7[,3])
116
tableS7[,4]=prettyNum(tableS7[,4])
117
tableS7[,5]=prettyNum(tableS7[,5])
118
tableS7[,6]=prettyNum(tableS7[,6])
119
tableS7[,8]=prettyNum(tableS7[,8])
120
121
# annotate these genes, needed later:
122
tableS7[,1]=gsub("\\.", "-", tableS7[,1])
123
124
genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F)
125
genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores"
126
genelist_signif$type[genelist_signif[,1]%in%unique(coordinates.subtype$subtype)]="Subtype"
127
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho > 0)",1]]="Stromal/cancer gene (Rho > 0)"
128
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho < 0)",1]]="Stromal/cancer gene (Rho < 0)"
129
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"CTL/NK gene",1]]="CTL/NK gene"
130
genelist_signif$type[genelist_signif[,1]%in%aml_res_MDS[,1]]="MDS-signature gene"
131
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand"
132
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand"
133
tableS7$Type=genelist_signif$type
134
tableS7=tableS7[order(tableS7$Type),]
135
136
data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_AML.tsv", sep="\t")
137
data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_AML_signif.tsv", sep="\t")
138
139
# save cohort and survival
140
gexp=data.frame(scale(t(data)))
141
immunoscore=data.frame("HLAI"=scale(annot$HLAIScore), "HLAII"=scale(annot$HLAIIScore), "CytolyticScore"=scale(annot$CytolyticScore))
142
143
samples=lapply(unique(coordinates.subtype$subtype), function(type)coordinates.subtype$ID[coordinates.subtype$subtype%in%type])
144
subtypes=do.call(cbind, lapply(samples, function(id)annot$GSM.identifier..sample.%in%id))*1 
145
colnames(subtypes)=gsub("-", ".", unique(coordinates.subtype$subtype))
146
samp=data.frame(subtypes, "Age"=annot$AGE, "Gender"=annot$GENDER)
147
148
save(list = c("gexp","immunoscore", "TIME", "STATUS", "logicalv", "samp"), file="Hemap_AML_survival_data.Rdata")
149
150
#************************************* just MM *************************************
151
filterv = annot$subclasses%in%"Cancer_Myeloma"&!is.na(annot$MM_ISS)
152
logicalv=get.logical(annovector = list(annot$GSE.identifier..experiment.), filterv = filterv)
153
names(logicalv)=unique(paste(names(logicalv), annot$subclasses[filterv]))
154
155
# just MM
156
filterv = annot$subclasses%in%"Cancer_Myeloma"&!is.na(annot$MM_ISS)
157
logicalv2=get.logical(annovector = list(annot$subclasses), filterv = filterv)
158
names(logicalv2)="MM_all"
159
logicalv=append(logicalv2, logicalv)
160
names(logicalv)=c("Hemap_MM", "GSE19784_Hemap_MM", "GSE16716,GSE24080_Hemap_MM")
161
162
data.test=data.frame("time"=TIME[logicalv[[1]]], "status"=STATUS[logicalv[[1]]])
163
164
ggsurvplot(survfit(Surv(time, status) ~ 1, data = data.test), 
165
           xlab = "months", 
166
           ylab = "Overall survival probability")
167
168
169
DATA=data.frame("HLAI"=scale(annot$HLAIScore), "HLAII"=scale(annot$HLAIIScore), "ISS3"=(annot$MM_ISS==3)*1, "ISS1"=(annot$MM_ISS==1)*1, "Freq.CGA"=as.numeric(df$Freq.CGA),"Age"=annot$AGE, "Gender"=annot$GENDER, check.names = F)
170
171
mm_res=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
172
# mm_res_multivar=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA[,3:5],TIME,STATUS, univariate = F, pretty=F))
173
174
# costim
175
DATA=data.frame(scale(t(data[rownames(data)%in%c(co.stim[,1]),])))
176
mm_res_costim=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
177
178
DATA=data.frame(scale(t(data[rownames(data)%in%cga,])))
179
mm_res_antigen=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
180
181
# subtype:
182
load("Hemap_MM_subtypes.Rdata")
183
184
samples=lapply(unique(coordinates.subtype$subtype), function(type)coordinates.subtype$ID[coordinates.subtype$subtype%in%type])
185
DATA=data.frame(do.call(cbind, lapply(samples, function(id)annot$GSM.identifier..sample.%in%id))*1)
186
colnames(DATA)=gsub("-", "_", unique(coordinates.subtype$subtype))
187
188
mm_res_subtype=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
189
190
# make table S6, adjusted p-value set here to correct for number of comparisons in total:
191
tableS7=rbind(mm_res, mm_res_costim, mm_res_antigen, mm_res_subtype)
192
tableS7=tableS7[tableS7$Name%in%c("GSE16716,GSE24080_Hemap_MM"),]
193
tableS7$Adj.P=p.adjust(tableS7$P, method="BH")
194
195
196
tableS7[,2]=prettyNum(tableS7[,2])
197
tableS7[,3]=prettyNum(tableS7[,3])
198
tableS7[,4]=prettyNum(tableS7[,4])
199
tableS7[,5]=prettyNum(tableS7[,5])
200
tableS7[,6]=prettyNum(tableS7[,6])
201
tableS7[,8]=prettyNum(tableS7[,8])
202
203
# annotate these genes, needed later:
204
genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F)
205
genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores"
206
genelist_signif$type[genelist_signif[,1]%in%gsub("-", "_", unique(coordinates.subtype$subtype))]="Subtype"
207
genelist_signif$type[genelist_signif[,1]%in%cga]="CGA"
208
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand"
209
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand"
210
tableS7$Type=genelist_signif$type
211
tableS7=tableS7[order(tableS7$Type),]
212
tableS7[,1]=gsub("\\.", "-", tableS7[,1])
213
214
data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_MM_GSE24080.tsv", sep="\t")
215
data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_MM_GSE24080_signif.tsv", sep="\t")
216
217
218
# make table S6, adjusted p-value set here to correct for number of comparisons in total:
219
tableS7=rbind(mm_res, mm_res_costim, mm_res_antigen)
220
tableS7=tableS7[tableS7$Name%in%c("GSE19784_Hemap_MM"),]
221
tableS7$Adj.P=p.adjust(tableS7$P, method="BH")
222
223
tableS7[,2]=prettyNum(tableS7[,2])
224
tableS7[,3]=prettyNum(tableS7[,3])
225
tableS7[,4]=prettyNum(tableS7[,4])
226
tableS7[,5]=prettyNum(tableS7[,5])
227
tableS7[,6]=prettyNum(tableS7[,6])
228
tableS7[,8]=prettyNum(tableS7[,8])
229
230
# annotate these genes, needed later:
231
genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F)
232
genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores"
233
genelist_signif$type[genelist_signif[,1]%in%gsub("-", "_", unique(coordinates.subtype$subtype))]="Subtype"
234
genelist_signif$type[genelist_signif[,1]%in%cga]="CGA"
235
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand"
236
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand"
237
tableS7$Type=genelist_signif$type
238
tableS7=tableS7[order(tableS7$Type),]
239
tableS7[,1]=gsub("\\.", "-", tableS7[,1])
240
241
data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_MM_GSE19784.tsv", sep="\t")
242
data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_MM_GSE19784_signif.tsv", sep="\t")
243
244
# save cohort and survival
245
gexp=data.frame(scale(t(data)))
246
immunoscore=data.frame("HLAI"=scale(annot$HLAIScore), "HLAII"=scale(annot$HLAIIScore), "Freq.CGA"=as.numeric(df$Freq.CGA), check.names = F)
247
248
samples=lapply(unique(coordinates.subtype$subtype), function(type)coordinates.subtype$ID[coordinates.subtype$subtype%in%type])
249
subtype=do.call(cbind, lapply(samples, function(id)annot$GSM.identifier..sample.%in%id))*1
250
colnames(subtype)=c(gsub("-", ".", unique(coordinates.subtype$subtype)), colnames(subtype))
251
samp=data.frame(subtype, "Age"=annot$AGE, "Gender"=annot$GENDER, "ISS3"=(annot$MM_ISS==3)*1, "ISS1"=(annot$MM_ISS==1)*1)
252
253
save(list = c("gexp","immunoscore", "TIME", "STATUS", "logicalv", "samp"), file="Hemap_MM_survival_data.Rdata")
254
255
#***************************** just DLBCL ************************************
256
# remove genes not in chapyu dataset:
257
gexp=get(load("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/data/GSE98588/GSE98588_symbol_rma_normalized.Rdata"))
258
259
ME=microenv[microenv$disease%in%"DLBCL"&microenv$category=="Stromal/cancer gene (Rho > 0)"&microenv$Rho>0.4,]
260
microenv_dlbcl=ME[,1]
261
microenv_dlbcl=microenv_dlbcl[microenv_dlbcl%in%rownames(gexp)]
262
263
filterv = annot$subclasses%in%"BCL_DLBCL"&!is.na(annot$dlbcl_ipi)
264
logicalv=get.logical(annovector = list(annot$GSE.identifier..experiment.), filterv = filterv)
265
names(logicalv)=unique(paste(names(logicalv), annot$subclasses[filterv]))
266
267
filterv = annot$subclasses%in%"BCL_DLBCL"
268
logicalv2=get.logical(annovector = list(annot$subclasses), filterv = filterv)
269
names(logicalv2)="BCL_DLBCL_all"
270
271
logicalv=append(logicalv2, logicalv)
272
logicalv=logicalv[lapply(logicalv, sum)>2]
273
274
RCHOP=list(annot$Chemotherapy_RCHOP==1&!is.na(STATUS)&!TIME==0)
275
CHOP=list(annot$Chemotherapy_CHOP==1&!is.na(STATUS)&!TIME==0)
276
names(RCHOP)="Hemap_DLBCL_RCHOP"
277
names(CHOP)="Hemap_DLBCL_CHOP"
278
279
logicalv <- append(logicalv, append(RCHOP, CHOP))
280
281
# HLA, cytolytic
282
DATA=data.frame("HLAI"=scale(annot$HLAIScore), "HLAII"=scale(annot$HLAIIScore), "CytolyticScore"=scale(annot$CytolyticScore), "IPI_0to1"=(annot$dlbcl_ipi%in%c(0,1))*1, "IPI_4to5"=(annot$dlbcl_ipi%in%c(4,5))*1, "Freq.CGA"=as.numeric(df$Freq.CGA),  "ABC"=(grepl("ABC", annot$tbLY))*1, "GCB"=(grepl("GCB", annot$tbLY))*1,check.names = F)
283
284
dlbcl_res=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
285
# dlbcl_res_multivar=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA[,4:6],TIME,STATUS, univariate = F, pretty=F))
286
287
# costim
288
DATA=data.frame(scale(t(data[rownames(data)%in%c(co.stim[,1]),])))
289
dlbcl_res_costim=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
290
291
# microenv
292
DATA=data.frame(scale(t(data[rownames(data)%in%c(microenv_dlbcl),])))
293
dlbcl_res_microenv=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
294
295
DATA=data.frame(scale(t(data[rownames(data)%in%cga,])))
296
dlbcl_res_antigen=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
297
298
# make table S6, adjusted p-value set here to correct for number of comparisons in total:
299
tableS7=rbind(dlbcl_res, dlbcl_res_costim, dlbcl_res_microenv, dlbcl_res_antigen)
300
tableS7=tableS7[tableS7$Name%in%c("Hemap_DLBCL_RCHOP"),]
301
tableS7$Adj.P=p.adjust(tableS7$P, method="BH")
302
303
tableS7[,2]=prettyNum(tableS7[,2])
304
tableS7[,3]=prettyNum(tableS7[,3])
305
tableS7[,4]=prettyNum(tableS7[,4])
306
tableS7[,5]=prettyNum(tableS7[,5])
307
tableS7[,6]=prettyNum(tableS7[,6])
308
tableS7[,8]=prettyNum(tableS7[,8])
309
310
# annotate these genes, needed later:
311
genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F)
312
genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores"
313
genelist_signif$type[genelist_signif[,1]%in%cga]="CGA"
314
genelist_signif$type[tableS7$Feature%in%c("ABC", "GCB")]="Subtype"
315
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho > 0)",1]]="Stromal/cancer gene (Rho > 0)"
316
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho < 0)",1]]="Stromal/cancer gene (Rho < 0)"
317
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"CTL/NK gene",1]]="CTL/NK gene"
318
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand"
319
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand"
320
321
tableS7$Type=genelist_signif$type
322
tableS7=tableS7[order(tableS7$Type),]
323
tableS7[,1]=gsub("\\.", "-", tableS7[,1])
324
325
data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_DLBCL_RCHOP.tsv", sep="\t")
326
data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_DLBCL_RCHOP_signif.tsv", sep="\t")
327
328
329
# make table S6, adjusted p-value set here to correct for number of comparisons in total:
330
tableS7=rbind(dlbcl_res, dlbcl_res_costim, dlbcl_res_microenv, dlbcl_res_antigen) 
331
tableS7=tableS7[tableS7$Name%in%c("Hemap_DLBCL_CHOP"),]
332
tableS7$Adj.P=p.adjust(tableS7$P, method="BH")
333
334
tableS7[,2]=prettyNum(tableS7[,2])
335
tableS7[,3]=prettyNum(tableS7[,3])
336
tableS7[,4]=prettyNum(tableS7[,4])
337
tableS7[,5]=prettyNum(tableS7[,5])
338
tableS7[,6]=prettyNum(tableS7[,6])
339
tableS7[,8]=prettyNum(tableS7[,8])
340
341
# annotate these genes, needed later:
342
genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F)
343
genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores"
344
genelist_signif$type[genelist_signif[,1]%in%cga]="CGA"
345
genelist_signif$type[tableS7$Feature%in%c("ABC", "GCB")]="Subtype"
346
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho > 0)",1]]="Stromal/cancer gene (Rho > 0)"
347
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho < 0)",1]]="Stromal/cancer gene (Rho < 0)"
348
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"CTL/NK gene",1]]="CTL/NK gene"
349
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand"
350
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand"
351
352
tableS7$Type=genelist_signif$type
353
tableS7=tableS7[order(tableS7$Type),]
354
tableS7[,1]=gsub("\\.", "-", tableS7[,1])
355
356
data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_DLBCL_CHOP.tsv", sep="\t")
357
data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_DLBCL_CHOP_signif.tsv", sep="\t")
358
359
# save cohort and survival
360
gexp=data.frame(scale(t(data)))
361
immunoscore=data.frame("HLAI"=scale(annot$HLAIScore), "HLAII"=scale(annot$HLAIIScore), "Freq.CGA"=as.numeric(df$Freq.CGA), check.names = F)
362
363
samp=data.frame("Age"=annot$AGE, "Gender"=annot$GENDER, "IPI_0to1"=(annot$dlbcl_ipi%in%c(0,1))*1, "IPI_4to5"=(annot$dlbcl_ipi%in%c(4,5))*1,"ABC"=(grepl("ABC", annot$tbLY))*1, "GCB"=(grepl("GCB", annot$tbLY))*1)
364
365
save(list = c("gexp","immunoscore","samp", "TIME", "STATUS", "logicalv"), file="Hemap_DLBCL_survival_data.Rdata")
366
367
368
#******************************************** TCGA AML *****************************************************
369
fm_org=get(load("TCGA_AML_FM_DUFVA.Rdata"))
370
fm=fm_org[,!is.na(fm_org["N:SAMP:CytolyticScore",])]
371
372
risks=c("N:CLIN:Age:::::",
373
        "C:CLIN:acute_myeloid_leukemia_calgb_cytogenetics_risk_category:::::" ,
374
        "C:CLIN:FISH_test_component:::::",
375
        "B:GNAB:NPM1:chr5:170814708:170837888:+:y_n_somatic",
376
        "B:GNAB:FLT3:chr13:28577411:28674729:-:y_n_somatic",
377
        "B:GNAB:CEBPA:chr19:33790840:33793430:-:y_n_somatic",
378
        "B:GNAB:TP53:chr17:7565097:7590863:-:y_n_somatic")
379
380
df=t(fm[rownames(fm)%in%risks,])
381
colnames(df)=do.call(rbind, strsplit(colnames(df), ":"))[,3]
382
383
data=data.matrix(fm[grepl("GEXP", rownames(fm)),])
384
rownames(data)=make.unique(do.call(rbind, strsplit(rownames(data), ":"))[,3])
385
386
387
OS=as.numeric(fm["N:CLIN:OS.months..3.31.12:::::",])
388
TIME=OS
389
STATUS=as.numeric(fm["C:CLIN:vital_status_TCGA_paper:::::",]=="DECEASED")
390
PFS=as.numeric(fm["N:CLIN:EFS.months....4.30.13:::::",])
391
392
DATA=data.frame("HLAI"=scale(as.numeric(fm["N:SAMP:HLAIScore",])), "HLAII"=scale(as.numeric(fm["N:SAMP:HLAIIScore",])), "CytolyticScore"=scale(as.numeric(fm["N:SAMP:CytolyticScore",])))
393
394
logicalv=list("TCGA_AML"=rep(T, dim(DATA)[1]))
395
396
TCGA_AML_res=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
397
# TCGA_AML_multivar=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = F, pretty=F))
398
399
# clinical
400
DATA_clin=data.frame("Age"=scale(as.numeric(fm["N:CLIN:Age:::::",])), "Blast.percentage"=scale(as.numeric(fm["N:CLIN:X.BM.Blast:::::",])), "Gender"=as.character(fm["C:CLIN:Sex:::::",]))
401
TCGA_AML_clin=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_clin,TIME,STATUS, univariate = T, pretty=F))
402
403
# costim
404
DATA=data.frame(scale(t(data[rownames(data)%in%c(co.stim[,1]),])))
405
TCGA_AML_costim=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
406
407
DATA_hla=data.frame(scale(t(data[rownames(data)%in%hlagenes,])))
408
TCGA_AML_HLA=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_hla,TIME,STATUS, univariate = T, pretty=F))
409
410
# MDS
411
DATA=data.frame(scale(t(data[rownames(data)%in%c(MDS$MDS_signature_all_filt),])))
412
TCGA_AML_MDS=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
413
414
# mikroenvironment
415
ME=microenv[microenv$disease%in%"AML",]
416
DATA=data.frame(scale(t(data[rownames(data)%in%ME$gene,])))
417
TCGA_res_microenvironment=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
418
TCGA_res_microenvironment[TCGA_res_microenvironment$Adj.P<0.1,]
419
420
# subtype:
421
load("TCGA_AML_subtypes.Rdata")
422
423
DATA_subtypes=data.frame(do.call(cbind, get.logical(list(coordinates.subtype$subtype)))*1)
424
425
TCGA_aml_res_subtype=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_subtypes,TIME,STATUS, univariate = T, pretty=F))
426
427
# make table S6, adjusted p-value set here to correct for number of comparisons in total:
428
tableS7=rbind(TCGA_AML_res, TCGA_AML_costim, TCGA_AML_MDS, TCGA_res_microenvironment, TCGA_aml_res_subtype, TCGA_AML_clin)
429
tableS7$Adj.P=p.adjust(tableS7$P, method="BH")
430
431
tableS7[,2]=prettyNum(tableS7[,2])
432
tableS7[,3]=prettyNum(tableS7[,3])
433
tableS7[,4]=prettyNum(tableS7[,4])
434
tableS7[,5]=prettyNum(tableS7[,5])
435
tableS7[,6]=prettyNum(tableS7[,6])
436
tableS7[,8]=prettyNum(tableS7[,8])
437
438
# annotate these genes, needed later:
439
tableS7[,1]=gsub("\\.", "-", tableS7[,1])
440
441
genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F)
442
genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores"
443
genelist_signif$type[genelist_signif[,1]%in%unique(coordinates.subtype$subtype)]="Subtype"
444
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho > 0)",1]]="Stromal/cancer gene (Rho > 0)"
445
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho < 0)",1]]="Stromal/cancer gene (Rho < 0)"
446
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"CTL/NK gene",1]]="CTL/NK gene"
447
genelist_signif$type[genelist_signif[,1]%in%aml_res_MDS[,1]]="MDS-signature gene"
448
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand"
449
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand"
450
451
tableS7$Type=genelist_signif$type
452
tableS7=tableS7[order(tableS7$Type),]
453
454
data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_TCGA_AML.tsv", sep="\t")
455
data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_TCGA_AML_signif.tsv", sep="\t")
456
457
# save cohort and survival
458
gexp=data.frame(scale(t(data)))
459
immunoscore=data.frame("HLAI"=scale(as.numeric(fm["N:SAMP:HLAIScore",])), "HLAII"=scale(as.numeric(fm["N:SAMP:HLAIIScore",])), "CytolyticScore"=scale(as.numeric(fm["N:SAMP:CytolyticScore",])))
460
461
samp=cbind(DATA_clin, DATA_subtypes)
462
463
save(list = c("gexp","immunoscore", "samp", "TIME", "STATUS", "logicalv"), file="TCGA_AML_survival_data.Rdata")
464
465
#********************************************************* Compass MM *********************************************************
466
fm=get(load("MM_COMPASS_FM.Rdata"))
467
annot=get(load("MM_COMPASS_ANNOT.Rdata"))
468
469
annot=annot[match(colnames(fm), rownames(annot)),]
470
471
data=fm[grepl("N:GEXP:", rownames(fm)),]
472
rownames(data)=gsub("N:GEXP:", "", rownames(data))
473
474
data.mut=fm[grepl("B:GNAB:", rownames(fm)),]
475
rownames(data.mut)=gsub("B:GNAB:", "", rownames(data.mut))
476
data.mut.filt=data.mut[!rowSums(data.mut, na.rm = T)<10,]
477
478
TIME=as.numeric(fm["N:CLIN:OS",])
479
STATUS=as.numeric(fm["B:CLIN:STATUS",])
480
481
STATUS[TIME>1825&!is.na(STATUS)&STATUS==1]=0 # change to 4year survival, 5 year sharp drop
482
TIME[TIME>1825]=1825
483
484
# compute antigen scores
485
t.df = read.delim("t.antigen_df.txt", stringsAsFactors=F, header=T)
486
t.df=t.df[order(t.df[,3]),]
487
genelist=t.df[,1]
488
489
data.test=data.frame("time"=TIME[logicalv[[1]]]*0.0328767, "status"=STATUS[logicalv[[1]]])
490
491
ggsurvplot(survfit(Surv(time, status) ~ 1, data = data.test), 
492
           xlab = "months", 
493
           ylab = "Overall survival probability")
494
495
expressed_testis_num=as.numeric(fm["N:SAMP:nCGA",])
496
497
l.regulon.gene=regulon.feats(fm, co.stim[,1])
498
499
hlagenes=c("B2M", "HLA-A", "HLA-B", "HLA-C", "HLA-DMA", "HLA-DMB", "HLA-DPA1", "HLA-DPB1", "HLA-DRA", "HLA-DRB1", "CIITA")
500
501
DATAcostim=scale(data.frame(t(gexp[rownames(gexp)%in%co.stim[,1],])))
502
DATAhla=scale(data.frame(t(gexp[rownames(gexp)%in%hlagenes,])))
503
DATAcga=scale(data.frame(t(gexp[rownames(gexp)%in%t.df[,1],])))
504
505
DATA=data.frame("HLAI"=scale(as.numeric(fm["N:SAMP:HLAIScore",])), "HLAII"=scale(as.numeric(fm["N:SAMP:HLAIIScore",])), "ISS1"=as.numeric(fm["B:CLIN:R_ISS_1",]),"ISS2"=as.numeric(fm["B:CLIN:R_ISS_2",]),"ISS3"=as.numeric(fm["B:CLIN:R_ISS_3",]), "Freq.CGA"=as.numeric(expressed_testis_num), stringsAsFactors = F)
506
507
logicalv=list(!is.na(expressed_testis_num))
508
509
names(logicalv)="CoMMPass"
510
511
# plot overall survival
512
r=lapply(seq(logicalv), function(i){
513
  data.test=data.frame("time"=TIME[logicalv[[i]]], "status"=STATUS[logicalv[[i]]])
514
  
515
  ggsurvplot(survfit(Surv(time, status) ~ 1, data = data.test), 
516
             xlab = "months", 
517
             ylab = "Overall survival probability",title=(names(logicalv)[i])
518
  )
519
})
520
521
names(r)=names(logicalv)
522
523
pdf("CoMMpass_MM_cohorts.pdf", width =5, height = ceiling(length(r)/2)*2.75)
524
plots.together=arrange_ggsurvplots(r, print = TRUE, ncol = 2, nrow = ceiling(length(r)/2))
525
dev.off()
526
527
commpass_res=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
528
commpass_res_multivar=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = F, pretty=F))
529
530
# costim, no mut
531
DATA=data.frame(scale(t(data[rownames(data)%in%c(co.stim[,1]),])))
532
commpass_costim=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
533
534
# mutations
535
mut=t(data.mut.filt[rownames(data.mut.filt)%in%c(t.df[,1]),])
536
colnames(mut)=paste0("MUT:", colnames(mut))
537
DATA=data.frame(scale(t(data[rownames(data)%in%c(t.df[,1]),])), mut)
538
539
commpass_antig=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
540
541
# subtype:
542
load("CoMMpass_MM_subtypes.Rdata")
543
coordinates.subtype=coordinates.subtype[match(colnames(fm), coordinates.subtype$ID),]
544
545
coordinates.subtype$subtype[coordinates.subtype$cluster=="CGA_Prolif"&!is.na(coordinates.subtype$cluster)]="CGA_Prolif"
546
DATA_subtypes=data.frame(do.call(cbind, get.logical(list(coordinates.subtype$subtype)))*1)
547
commpass_subtype=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_subtypes,TIME,STATUS, univariate = T, pretty=F))
548
549
# make table S6, adjusted p-value set here to correct for number of comparisons in total:
550
tableS7=rbind(commpass_res, commpass_costim, commpass_antig, commpass_subtype)
551
tableS7$Adj.P=p.adjust(tableS7$P, method="BH")
552
553
# annotate these genes, needed later:
554
tableS7[,2]=prettyNum(tableS7[,2])
555
tableS7[,3]=prettyNum(tableS7[,3])
556
tableS7[,4]=prettyNum(tableS7[,4])
557
tableS7[,5]=prettyNum(tableS7[,5])
558
tableS7[,6]=prettyNum(tableS7[,6])
559
tableS7[,8]=prettyNum(tableS7[,8])
560
561
# annotate these genes, needed later:
562
genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F)
563
genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores"
564
genelist_signif$type[genelist_signif[,1]%in%cga]="CGA"
565
genelist_signif$type[genelist_signif[,1]%in%commpass_subtype$Feature]="Subtype"
566
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand"
567
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand"
568
569
tableS7$Type=genelist_signif$type
570
tableS7=tableS7[order(tableS7$Type),]
571
tableS7[,1]=gsub("\\.", "-", tableS7[,1])
572
573
data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_CoMMpass.tsv", sep="\t")
574
data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_CoMMpass_signif.tsv", sep="\t")
575
576
# save cohort and survival
577
gexp=data.frame(scale(t(data)))
578
immunoscore=data.frame("HLAI"=scale(as.numeric(fm["N:SAMP:HLAIScore",])), "HLAII"=scale(as.numeric(fm["N:SAMP:HLAIIScore",])), "Freq.CGA"=as.numeric(expressed_testis_num), stringsAsFactors = F)
579
580
samp=cbind(data.frame("ISS1"=as.numeric(fm["B:CLIN:R_ISS_1",]),"ISS2"=as.numeric(fm["B:CLIN:R_ISS_2",]),"ISS3"=as.numeric(fm["B:CLIN:R_ISS_3",]), stringsAsFactors = F), DATA_subtypes)
581
582
save(list = c("gexp","immunoscore", "samp", "TIME", "STATUS", "logicalv"), file="CoMMpass_survival_data.Rdata")
583
584
#********************************************************* Chapyu DLBCL *********************************************************
585
586
fm=get(load("GSE98588_fm.Rdata"))
587
annot=get(load("GSE98588_annot.Rdata"))
588
589
TIME=as.numeric(fm["N:CLIN:OS",])
590
PFS=as.numeric(fm["N:CLIN:PFS",])
591
STATUS=as.numeric(fm["B:CLIN:OS_STAT",])
592
STATUS2=as.numeric(fm["B:CLIN:PFS_STAT",])
593
594
# compute antigen scores
595
genelist=t.df[,1]
596
597
data=get(load("GSE98588_symbol_rma_normalized.Rdata"))
598
colnames(data)=gsub("_DLBCL", "", colnames(data))
599
gexp=data
600
601
load("GSE98588_DLBCL_mixtureM_profile.Rdata")
602
profile[profile==-1] = 0
603
profile[data.matrix(data)<5]=0
604
605
expressed_testis_num=as.numeric(colSums(profile[rownames(profile)%in%genelist,]))
606
607
cnv_annot=fread("41591_2018_16_MOESM8_ESM_CNV_ANNOT.txt", data.table=F)
608
609
l.regulon.gene=regulon.feats(fm, c(co.stim[,1], hlagenes), cnv_annot)
610
611
ME=microenv[microenv$disease%in%"DLBCL"&microenv$category=="Stromal/cancer gene (Rho > 0)"&microenv$Rho>0.4,]
612
613
# all costim-cytolytic-CGA correlated mutations:
614
costim_feats=data.matrix(fm[rownames(fm)%in%unlist(l.regulon.gene),])
615
rownames(costim_feats)=sapply(rownames(costim_feats), function(n){
616
  if(!grepl("CNVR", n))return(n)
617
  a=names(l.regulon.gene)[sapply(l.regulon.gene, function(a)any(a%in%n))]
618
  if(length(a))paste0(n,"@", paste(a, collapse=","))
619
})
620
621
DATAmut=data.frame(t(costim_feats[grepl("GNAB|CNVR", rownames(costim_feats)),]))
622
623
DATA=data.frame("HLAI"=scale(as.numeric(fm["N:SAMP:HLAIScore",])), "HLAII"=scale(as.numeric(fm["N:SAMP:HLAIIScore",])), "CytolyticScore"=scale(as.numeric(fm["N:SAMP:CytolyticScore",])), "Freq.CGA"=as.numeric(expressed_testis_num), stringsAsFactors = F)
624
625
logicalv=list(rep(T, dim(DATA)[1]))
626
names(logicalv)="GSE98588_DLBCL"
627
628
GSE98588_DLBCL_res=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
629
# GSE98588_DLBCL_multivar=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA[,4:6],TIME,STATUS, univariate = F, pretty=F))
630
631
# Clinical
632
DATA_clin=data.frame("IPI_0to1"=annot$IPI%in%c(0,1)*1,"IPI_4to5"=annot$IPI%in%c(4,5)*1, "ABC"=(annot$COO_byGEP=="ABC")*1 ,"GCB"=(annot$COO_byGEP=="GCB")*1,  stringsAsFactors = F)
633
GSE98588_DLBCL_clin=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_clin,TIME,STATUS, univariate = T, pretty=F))
634
635
# immuno-editing:
636
GSE98588_DLBCL_immunoediting=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATAmut,TIME,STATUS, univariate = T, pretty=F))
637
GSE98588_DLBCL_immunoediting$Feature=gsub("B.GNAB.", "MUT:", GSE98588_DLBCL_immunoediting$Feature)
638
GSE98588_DLBCL_immunoediting$Feature=gsub("HLA.", "HLA-", GSE98588_DLBCL_immunoediting$Feature)
639
GSE98588_DLBCL_immunoediting$Feature=gsub("B.CNVR.", "", GSE98588_DLBCL_immunoediting$Feature)
640
641
# costim
642
DATA=data.frame(scale(t(data[rownames(data)%in%c(co.stim[,1]),])), DATAmut, "costim.mut"=(rowSums(DATAmut)))
643
GSE98588_DLBCL_costim=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
644
645
# monocyte
646
DATA=data.frame(scale(t(data[rownames(data)%in%c(ME[,1]),])))
647
GSE98588_DLBCL_microenvironment=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
648
649
DATA=data.frame(scale(t(data[rownames(data)%in%c(t.df[,1]),])))
650
GSE98588_DLBCL_antigen=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
651
652
# make table S6, adjusted p-value set here to correct for number of comparisons in total:
653
tableS7=rbind(GSE98588_DLBCL_res,GSE98588_DLBCL_clin, GSE98588_DLBCL_costim, GSE98588_DLBCL_microenvironment, GSE98588_DLBCL_antigen, GSE98588_DLBCL_immunoediting) 
654
tableS7$Adj.P=p.adjust(tableS7$P, method="BH")
655
656
657
tableS7[,2]=prettyNum(tableS7[,2])
658
tableS7[,3]=prettyNum(tableS7[,3])
659
tableS7[,4]=prettyNum(tableS7[,4])
660
tableS7[,5]=prettyNum(tableS7[,5])
661
tableS7[,6]=prettyNum(tableS7[,6])
662
tableS7[,8]=prettyNum(tableS7[,8])
663
664
# annotate these genes, needed later:
665
genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F)
666
genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores"
667
genelist_signif$type[genelist_signif[,1]%in%colnames(DATAmut)]="Immune Editing mutation"
668
genelist_signif$type[genelist_signif[,1]%in%cga]="CGA"
669
genelist_signif$type[tableS7$Feature%in%c("ABC", "GCB")]="Subtype"
670
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho > 0)",1]]="Stromal/cancer gene (Rho > 0)"
671
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho < 0)",1]]="Stromal/cancer gene (Rho < 0)"
672
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"CTL/NK gene",1]]="CTL/NK gene"
673
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand"
674
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand"
675
676
tableS7$Type=genelist_signif$type
677
tableS7=tableS7[order(tableS7$Type),]
678
tableS7[,1]=gsub("\\.", "-", tableS7[,1])
679
680
data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_GSE98588_DLBCL.tsv", sep="\t")
681
data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_GSE98588_DLBCL_signif.tsv", sep="\t")
682
683
# save cohort and survival
684
gexp=data.frame(scale(t(data)))
685
immunoscore=data.frame("HLAI"=scale(as.numeric(fm["N:SAMP:HLAIScore",])), "HLAII"=scale(as.numeric(fm["N:SAMP:HLAIIScore",])), "CytolyticScore"=scale(as.numeric(fm["N:SAMP:CytolyticScore",])), "Freq.CGA"=as.numeric(expressed_testis_num), stringsAsFactors = F)
686
687
samp=DATA_clin
688
689
save(list = c("gexp","immunoscore", "samp", "TIME", "STATUS", "logicalv"), file="GSE98588_DLBCL_survival_data.Rdata")
690
691
#********************************************************* Reddy DLBCL *********************************************************
692
fm=get(load("REDDY_DLBCL_fm.Rdata"))
693
annot=get(load("REDDY_DLBCL_annot.Rdata"))
694
695
data=data.matrix(fm[grepl("GEXP", rownames(fm)),])
696
rownames(data)=gsub("N:GEXP:", "", rownames(data))
697
698
TIME=as.numeric(annot$Overall.Survival.years)
699
STATUS=(as.numeric(annot$Censored)==0)*1 # 0 indicates no censoring, meaning that the death was observed, whereas a 1 indicates that the patient was alive
700
701
ME=microenv[microenv$disease%in%"DLBCL"&microenv$category=="Stromal/cancer gene (Rho > 0)"&microenv$Rho>0.4,]
702
703
l.regulon.gene=regulon.feats(fm, c(co.stim[,1], hlagenes))
704
705
# all costim-cytolytic-CGA correlated mutations:
706
costim_feats=data.matrix(fm[rownames(fm)%in%unlist(l.regulon.gene),])
707
rownames(costim_feats)=sapply(rownames(costim_feats), function(n){
708
  if(!grepl("CNVR", n))return(n)
709
  a=names(l.regulon.gene)[sapply(l.regulon.gene, function(a)any(a%in%n))]
710
  # if(length(a))paste0(n,"@", paste(a, collapse=","))
711
})
712
713
DATAmut=data.frame(t(costim_feats[grepl("GNAB|CNVR", rownames(costim_feats)),]))
714
715
DATA=data.frame("HLAII"=scale(as.numeric(fm["N:SAMP:HLAIIScore",])), "CytolyticScore"=scale(as.numeric(fm["N:SAMP:CytolyticScore",])), stringsAsFactors = F)
716
717
logicalv=list("Reddy_DLBCL"=!is.na(data[1,]), "Reddy_DLBCL_ABC"=!is.na(data[1,])&annot$ABC.GCB..RNAseq.=="ABC", "Reddy_DLBCL_GCB"=!is.na(data[1,])&annot$ABC.GCB..RNAseq.=="GCB")
718
719
Reddy_DLBCL_res=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
720
Reddy_DLBCL_multivar=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = F, pretty=F))
721
722
# Clinical
723
DATA_clin=data.frame("IPI_0to1"=annot$IPI%in%c(0,1)*1,"IPI_4to5"=annot$IPI%in%c(4,5)*1, "ABC"=(annot$ABC.GCB..RNAseq.=="ABC")*1 ,"GCB"=(annot$ABC.GCB..RNAseq.=="GCB")*1,  stringsAsFactors = F)
724
Reddy_DLBCL_clin=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_clin,TIME,STATUS, univariate = T, pretty=F))
725
726
# immuno-editing:
727
Reddy_DLBCL_immunoediting=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATAmut,TIME,STATUS, univariate = T, pretty=F))
728
Reddy_DLBCL_immunoediting$Feature=gsub("B.GNAB.", "MUT:", Reddy_DLBCL_immunoediting$Feature)
729
Reddy_DLBCL_immunoediting$Feature=gsub("HLA.", "HLA-", Reddy_DLBCL_immunoediting$Feature)
730
Reddy_DLBCL_immunoediting$Feature=gsub("N.CNVR.", "", Reddy_DLBCL_immunoediting$Feature)
731
732
# costim
733
DATA=data.frame(scale(t(data[rownames(data)%in%c(co.stim[,1]),])))
734
Reddy_DLBCL_costim=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
735
736
# microenvironment
737
DATA=data.frame(scale(t(data[rownames(data)%in%c(ME[,1]),])))
738
Reddy_DLBCL_microenvironment=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
739
740
# make table S6, adjusted p-value set here to correct for number of comparisons in total:
741
tableS7=rbind(Reddy_DLBCL_res,Reddy_DLBCL_clin[!is.na(Reddy_DLBCL_clin$`exp(coef)`),], Reddy_DLBCL_immunoediting, Reddy_DLBCL_costim, Reddy_DLBCL_microenvironment) 
742
tableS7=tableS7[tableS7$Name%in%"Reddy_DLBCL",]
743
tableS7$Adj.P=p.adjust(tableS7$P, method="BH")
744
745
746
tableS7[,2]=prettyNum(tableS7[,2])
747
tableS7[,3]=prettyNum(tableS7[,3])
748
tableS7[,4]=prettyNum(tableS7[,4])
749
tableS7[,5]=prettyNum(tableS7[,5])
750
tableS7[,6]=prettyNum(tableS7[,6])
751
tableS7[,8]=prettyNum(tableS7[,8])
752
753
# annotate these genes, needed later:
754
genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F)
755
genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores"
756
genelist_signif$type[genelist_signif[,1]%in%Reddy_DLBCL_immunoediting$Feature]="Immune Editing mutation"
757
genelist_signif$type[genelist_signif[,1]%in%cga]="CGA"
758
genelist_signif$type[tableS7$Feature%in%c("ABC", "GCB")]="Subtype"
759
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho > 0)",1]]="Stromal/cancer gene (Rho > 0)"
760
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho < 0)",1]]="Stromal/cancer gene (Rho < 0)"
761
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"CTL/NK gene",1]]="CTL/NK gene"
762
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand"
763
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand"
764
765
tableS7$Type=genelist_signif$type
766
tableS7=tableS7[order(tableS7$Type),]
767
tableS7[,1]=gsub("\\.", "-", tableS7[,1])
768
769
data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Reddy_DLBCL.tsv", sep="\t")
770
data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Reddy_DLBCL_signif.tsv", sep="\t")
771
772
# save cohort and survival
773
gexp=data.frame(scale(t(data)))
774
immunoscore=data.frame("HLAII"=scale(as.numeric(fm["N:SAMP:HLAIIScore",])), "CytolyticScore"=scale(as.numeric(fm["N:SAMP:CytolyticScore",])), stringsAsFactors = F)
775
776
samp=DATA_clin
777
778
save(list = c("gexp","immunoscore", "samp", "TIME", "STATUS", "logicalv"), file="Reddy_DLBCL_survival_data.Rdata")
779
780
781
#************************************ beatAML:
782
load("BeatAML_fm.Rdata")
783
annot=get(load("BeatAML_fm_annot.Rdata"))
784
785
gexp=fm[grepl("GEXP", rownames(fm)),]
786
rownames(gexp)=gsub("N:GEXP:", "", rownames(gexp))
787
788
annot$vitalStatus[annot$vitalStatus=="Unknown"]=NA
789
TIME=as.numeric(annot$overallSurvival)
790
STATUS=as.numeric(annot$vitalStatus=="Dead")
791
792
STATUS[TIME>1825&!is.na(STATUS)&STATUS==1]=0 # change to 5year survival
793
TIME[TIME>1825]=1825
794
TIME[TIME==0]=0.1
795
796
filtv=annot$specimenType%in%"Bone Marrow Aspirate"&!(is.na(TIME)|is.na(STATUS)|is.na(annot$TCGA_coord))&annot$causeOfDeath%in%c("Alive", "Dead-Disease", "Dead-Unknown")
797
filtv=annot$specimenType%in%"Bone Marrow Aspirate"&!(is.na(annot$TCGA_coord))&annot$causeOfDeath%in%c("Alive", "Dead-Disease", "Dead-Unknown")
798
filtv2=annot$specimenType%in%"Bone Marrow Aspirate"&!(is.na(annot$TCGA_coord))&annot$TCGA_coord%in%c("CMP-like","MDS-like","Monocyte-like")&annot$causeOfDeath%in%c("Alive", "Dead-Disease", "Dead-Unknown")
799
filtv3=annot$specimenType%in%"Bone Marrow Aspirate"&!(is.na(annot$TCGA_coord))&annot$TCGA_coord%in%c("MDS-like")&annot$causeOfDeath%in%c("Alive", "Dead-Disease", "Dead-Unknown")
800
filtv4=annot$specimenType%in%"Bone Marrow Aspirate"&!(is.na(annot$TCGA_coord))&annot$TCGA_coord%in%c("CMP-like")&annot$causeOfDeath%in%c("Alive", "Dead-Disease", "Dead-Unknown")
801
filtv5=annot$specimenType%in%"Bone Marrow Aspirate"&!(is.na(annot$TCGA_coord))&annot$TCGA_coord%in%c("Monocyte-like")&annot$causeOfDeath%in%c("Alive", "Dead-Disease", "Dead-Unknown")
802
803
logicalv=list("BeatAML"=!(is.na(TIME)|is.na(STATUS)|is.na(annot$TCGA_coord))&annot$causeOfDeath%in%c("Alive", "Dead-Disease", "Dead-Unknown"), "beatAML_BMonly"=filtv, "normal_karyotype"=filtv2, "MDS-like"=filtv3,  "CMP-like"=filtv4,  "Monocyte-like"=filtv5)
804
805
# plot overall survival
806
r=lapply(seq(logicalv), function(i){
807
  ggsurvplot(survfit(Surv(time, status) ~ 1, data = data.frame("time"=TIME[logicalv[[i]]], "status"=STATUS[logicalv[[i]]])), 
808
             xlab = "months", 
809
             ylab = "Overall survival probability",title=(names(logicalv)[i])
810
  )
811
})
812
813
names(r)=names(logicalv)
814
815
pdf("BeatAML_cohorts.pdf", width =5, height = ceiling(length(r)/2)*2.75)
816
plots.together=arrange_ggsurvplots(r, print = TRUE, ncol = 2, nrow = ceiling(length(r)/2))
817
dev.off()
818
819
data.test=data.frame("time"=TIME[logicalv[[1]]]*0.0328767, "status"=STATUS[logicalv[[1]]])
820
821
ggsurvplot(survfit(Surv(time, status) ~ 1, data = data.test), 
822
           xlab = "months", 
823
           ylab = "Overall survival probability")
824
825
# HLA, cytolytic
826
DATA=data.frame(scale(t(fm[grepl("N:SAMP:.*.Score", rownames(fm)),])), check.names = F)
827
colnames(DATA)=c("HLAI", "HLAII", "CytolyticScore")
828
beatAML=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
829
830
# Clinical:
831
DATA_clin=data.frame(t(fm[grepl("ELN2017", rownames(fm)),]),"Age"=as.numeric(fm[grepl("ageAtDiagnosis", rownames(fm)),,drop=F]), t(fm[grepl("BM_Transplant", rownames(fm)),,drop=F]),t(fm[grepl("B:CLIN:priorMDS_TRUE", rownames(fm)),,drop=F]), t(fm[grepl("in_PB|in_BM|B:CLIN:is|finalFusion_Complex", rownames(fm)),,drop=F]))
832
DATA_clin$Age[is.na(DATA_clin$Age)]=median(DATA_clin$Age, na.rm = T) #one observation --> set to median
833
834
colnames(DATA_clin)=gsub("..CLIN.", "", colnames(DATA_clin))
835
beatAML_clin=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_clin,TIME,STATUS, univariate = T, pretty=F))
836
837
# stroma
838
ME=microenv[microenv$disease=="AML",]
839
DATA=data.frame(t(gexp[rownames(gexp)%in%ME$gene,]))
840
beatAML_microenv=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
841
beatAML_microenv[beatAML_microenv$P<0.01,]
842
843
# MDS
844
DATA=data.frame(t(gexp[rownames(gexp)%in%MDS$MDS_signature_all_filt,]))
845
beatAML_MDS=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
846
beatAML_MDS[beatAML_MDS$P<0.05,]
847
848
# costim
849
DATA=data.frame(t(gexp[rownames(gexp)%in%co.stim[,1],]))
850
beatAML_costim=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F))
851
beatAML_costim[beatAML_costim$P<0.05,]
852
853
# subtype:
854
load("BeatAML_subtypes.Rdata")
855
coordinates.subtype$subtype[coordinates.subtype$subtype%in%"Progenitor-like"]="CMP-like"
856
coordinates.subtype=coordinates.subtype[match(colnames(fm), coordinates.subtype$ID),]
857
858
DATA_subtypes=data.frame(do.call(cbind, get.logical(list(coordinates.subtype$subtype)))*1)
859
860
beataml_res_subtype=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_subtypes,TIME,STATUS, univariate = T, pretty=F))
861
862
fun.kapplanMeier(TIME[logicalv[[1]]], STATUS[logicalv[[1]]],GROUPS=coordinates.subtype$subtype[logicalv[[1]]], MONTHS=T, PVAL=1, INDIVIDUAL_GROUPS=F,LWD = 1, NAME = "Prognostic Index - validation")
863
864
# make table S6, adjusted p-value set here to correct for number of comparisons in total:
865
tableS7=rbind(beatAML, beataml_res_subtype, beatAML_costim, beatAML_MDS, beatAML_microenv)
866
tableS7=tableS7[tableS7$Name%in%"BeatAML",]
867
tableS7$Adj.P=p.adjust(tableS7$P, method="BH")
868
869
tableS7[,2]=prettyNum(tableS7[,2])
870
tableS7[,3]=prettyNum(tableS7[,3])
871
tableS7[,4]=prettyNum(tableS7[,4])
872
tableS7[,5]=prettyNum(tableS7[,5])
873
tableS7[,6]=prettyNum(tableS7[,6])
874
tableS7[,8]=prettyNum(tableS7[,8])
875
876
# annotate these genes, needed later:
877
genelist_signif=data.frame(tableS7[,1], type="", stringsAsFactors = F)
878
genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores"
879
genelist_signif$type[genelist_signif[,1]%in%beataml_res_subtype$Feature]="Subtype"
880
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho > 0)",1]]="Stromal/cancer gene (Rho > 0)"
881
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho < 0)",1]]="Stromal/cancer gene (Rho < 0)"
882
genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"CTL/NK gene",1]]="CTL/NK gene"
883
genelist_signif$type[genelist_signif[,1]%in%MDS$MDS_signature_all_filt]="MDS-signature gene"
884
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand"
885
genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand"
886
887
tableS7$Type=genelist_signif$type
888
tableS7=tableS7[order(tableS7$Type),]
889
tableS7[,1]=gsub("\\.", "-", tableS7[,1])
890
891
data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_beatAML.tsv", sep="\t")
892
data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_beatAML_signif.tsv", sep="\t")
893
894
# save cohort and survival
895
gexp=data.frame(scale(t(gexp)))
896
immunoscore=data.frame(scale(t(fm[grepl("N:SAMP:.*.Score", rownames(fm)),])), check.names = F)
897
colnames(immunoscore)=c("HLAI", "HLAII", "CytolyticScore")
898
899
samp=cbind(DATA_clin, DATA_subtypes)
900
901
save(list = c("gexp","immunoscore", "samp", "TIME", "STATUS", "logicalv"), file="BeatAML_survival_data.Rdata")
902
903
#******************************** make excel table
904
files=list.files(pattern = "tableS7")
905
files=files[!grepl("_CHOP", files)]
906
907
univariate.results=lapply(files[grepl("signif", files)], fread, data.table=F)
908
names(univariate.results)=gsub("tableS7_|_signif.tsv", "", files[grepl("signif", files)])
909
univariate.results=univariate.results[c(grep("DLBCL", names(univariate.results)), grep("MM", names(univariate.results)), grep("AML", names(univariate.results)))]
910
911
require(openxlsx)
912
913
write.xlsx(univariate.results, file = "TableS7_cox.xlsx", )