a b/preprocessing/Preprocessing_Hemap_featurematrix_generation.R
1
#***************************************************************************************************
2
#******************************* Make immunology FM **********************************************
3
#***************************************************************************************************
4
library(mclust)
5
library(data.table)
6
library(parallel)
7
8
source("/research/users/ppolonen/git_home/common_scripts/featurematrix/functions_generate_fm.R")
9
10
# WD
11
setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/data/")
12
13
# GEXP
14
gexp=t(get(load("data9544_with_gene_symbols.RData")))
15
16
# annotation table
17
check=get(load("Hemap_immunology_Annotations_8304.Rdata"))
18
annot=read.delim("anno_coord_data9544_15pct_bw2.5_updated.txt", stringsAsFactors=F, header=T)
19
20
# listing files, not needed anymore
21
# f=list.files("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/Annotations_immunology/", pattern = ".csv", full.names = T)
22
# f=f[-1]
23
# annot_normal=do.call(rbind, lapply(f, read.csv, header=F, stringsAsFactors=F, skip=1))
24
25
#******************************** filtering *************************************
26
exvivotreatments_allowed <- c("none", "na", "control", "activation", "differentiation", "differentiation followed by activation", " differentiation followed by LPS+IFNg", "differentiation followed by IFN", "differentiation followed by inflammatory cytokines", "differentiation followed by LPS", "differentiation followed by CD40L", "differentiation followed by Poly(I:C)", "differentiation (IL-4)", "differentiation with EPO", "anti-IgM", "IL-2", "IL-3", "IL-3+CpG")
27
28
rm_healthy = annot$Sample.type%in%c("NonCancerHealthy")&!(annot$In.vivo.treatment%in%c("na", "none", "no")&annot$Ex.vivo.treatment%in%exvivotreatments_allowed)
29
rm_cancer_prolif = annot$Sample.type%in%c("Cancer", "Prolif")&!annot$Ex.vivo.treatment=="none"
30
rm_celline = annot$Sample.type%in%c("CellLine")&!annot$Ex.vivo.treatment%in%c("none", "na", "control")
31
rm_treatments = annot$GSE.identifier..experiment.%in%c("GSE26661")|annot$GSM.identifier..sample.%in%c("GSM425497", "GSM425499")
32
rm_noncancer = !annot$Sample.type%in%c("Cancer", "Prolif", "CellLine", "NonCancerHealthy")
33
rm_mm_pbmc = annot$colorClass=="MM"&!annot$Sample.isolation=="CD138+ plasma cells"
34
# cluster exclusion
35
# AML,GSE7538,24
36
# CLL,GSE18866,GSE9250,32
37
# LP,GSE12453,GSE7345,6
38
# MM,GSE24147,GSE24522,18
39
# MP,GSE12079,GSE15811,13
40
# TCL,GSE14879,16 microdissected, do not remove
41
42
rm_cluster_differ=annot$GSE.identifier..experiment.%in%c("GSE7538", "GSE9250", "GSE12453", "GSE18866","GSE24522", "GSE7345", "GSE12079", "GSE15811", "GSE24147")&annot$Sample.type=="Cancer"
43
44
annot_left_out = annot
45
46
annot_left_out$reason_removed[rm_healthy]="Normal Cell sample, ex-vivo or in-vivo treated"
47
annot_left_out$reason_removed[rm_cluster_differ]="Cancer sample, outlier cluster"
48
annot_left_out$reason_removed[rm_celline]="Cell line sample, ex-vivo treated"
49
annot_left_out$reason_removed[rm_cancer_prolif|rm_treatments]="Cancer sample, ex-vivo treated"
50
annot_left_out$reason_removed[rm_noncancer]="NonCancer sample, not healthy"
51
annot_left_out$reason_removed[rm_mm_pbmc]="NonCancer sample, not healthy"
52
53
annot_left_out <- annot_left_out[(rm_noncancer|rm_healthy|rm_cancer_prolif|rm_celline|rm_treatments|rm_cluster_differ|rm_mm_pbmc),]
54
fwrite(annot_left_out, "hemap_1072_leftout_reasonremoved.txt", sep="\t")
55
56
annot <- annot[!(rm_healthy|rm_cancer_prolif|rm_celline|rm_treatments|rm_cluster_differ|rm_noncancer|rm_mm_pbmc),]
57
gexp <- gexp[,!(rm_healthy|rm_cancer_prolif|rm_celline|rm_treatments|rm_cluster_differ|rm_noncancer|rm_mm_pbmc)]
58
59
newSamples=annot[!annot[,1]%in%check[,1],]
60
fwrite(newSamples, "new_samples_in_hemap_8472.txt", sep="\t")
61
62
dim(check[check[,1]%in%annot[,1],])
63
64
65
#****************************************************************************************
66
# annot[rm_healthy&annot$GSM.identifier..sample.%in%annot_normal[,1],] # excluded from previous normals
67
68
# new
69
annot$CLASS[annot$Sample.type%in%c("NonCancerHealthy")]=gsub("NonCancerHealthy_|NonCancerHealthy|na_","", gsub("_StemCell_|Myeloid_|_na_na|1_na_na|2_na_na|Lymphoid_|_G.*.|_na_G0|_na_G1.*.|T-|B-|M1-Differentiating|Erythroid_","", annot$CLASS[annot$Sample.type%in%c("NonCancerHealthy")]))
70
annot$CLASS[annot$Sample.type%in%c("NonCancerHealthy")][annot$CLASS[annot$Sample.type%in%c("NonCancerHealthy")]=="na"]="LymphNode_GerminalCentre"
71
annot$CLASS[annot$CLASS=="CD4+Tcell"]="RestingCD4+Tcell"
72
annot$CLASS[annot$CLASS=="CD8+Tcell"]="RestingCD8+Tcell"
73
74
annot$MAINCLASS[annot$Sample.type%in%c("NonCancerHealthy")]="NonCancerHealthy"
75
annot$colorClass[annot$Sample.type%in%c("NonCancerHealthy")][annot$CLASS[annot$Sample.type%in%c("NonCancerHealthy")]=="na"]="Lymphoid"
76
77
# old
78
# annot$CLASS[match(annot_normal[,1], annot$GSM.identifier..sample.)]=annot_normal[,5]
79
# annot$MAINCLASS[match(annot_normal[,1], annot$GSM.identifier..sample.)]=annot_normal[,3]
80
# annot$colorClass[match(annot_normal[,1], annot$GSM.identifier..sample.)]=annot_normal[,4]
81
82
#*********************************** compute geometric mean ********************************
83
# get certain genes GEXP
84
rownames(gexp)=paste("N:GEXP:", rownames(gexp), sep="")
85
86
dat_a=gexp[grep("GZMA|PRF1|GNLY|GZMH|GZMM", rownames(gexp)),]
87
# dat_a=matrix[grep("GZMA|PRF1", rownames(matrix)),]
88
dat=2^dat_a+0.01
89
rownames(dat)
90
gm1=log2(t(apply(dat, 2, gm_mean)))
91
rownames(gm1)="CytolyticScore"
92
93
# also HLA
94
dat_a2=gexp[rownames(gexp)%in%c("N:GEXP:B2M", "N:GEXP:HLA-A", "N:GEXP:HLA-B", "N:GEXP:HLA-C"),]
95
96
dat2=2^dat_a2+0.01
97
rownames(dat2)
98
gm2=log2(t(apply(dat2, 2, gm_mean)))
99
rownames(gm2)="HLAIScore"
100
101
# also HLAII
102
dat_a3=gexp[rownames(gexp)%in%c("N:GEXP:HLA-DMA",
103
                "N:GEXP:HLA-DMB",
104
                "N:GEXP:HLA-DPA1",
105
                "N:GEXP:HLA-DPB1",
106
                "N:GEXP:HLA-DRA",
107
                "N:GEXP:HLA-DRB1"),]
108
109
dat3=2^dat_a3+0.01
110
rownames(dat3)
111
gm3=log2(t(apply(dat3, 2, gm_mean)))
112
rownames(gm3)="HLAIIScore"
113
114
classification1=data.frame(t(rep("medium", length(gm1))), stringsAsFactors = F)
115
zscore=as.numeric(scale(t(gm1)))
116
classification1[zscore>=1]="high"
117
classification1[zscore<=(-1)]="low"
118
rownames(classification1)="CytolyticScore" 
119
colnames(classification1)=colnames(gexp)
120
121
classification2=data.frame(t(rep("medium", length(gm2))), stringsAsFactors = F)
122
zscore=as.numeric(scale(t(gm2)))
123
classification2[zscore>=1]="high"
124
classification2[zscore<=(-1)]="low"
125
rownames(classification2)="HLAIScore" 
126
colnames(classification2)=colnames(gexp)
127
128
classification3=data.frame(t(rep("medium", length(gm3))), stringsAsFactors = F)
129
zscore=as.numeric(scale(t(gm3)))
130
classification3[zscore>=1]="high"
131
classification3[zscore<=(-1)]="low"
132
rownames(classification3)="HLAIIScore" 
133
colnames(classification3)=colnames(gexp)
134
135
classification=data.frame(t(rbind(classification1,classification2,classification3)), stringsAsFactors = F)
136
137
138
immunoscores=as.data.frame(t(rbind(gm1, gm2, gm3)))
139
immunoscoresfm=make.features(immunoscores, datatype="SAMP", prefix="")
140
colnames(immunoscoresfm)=colnames(gexp)
141
142
immunoscores_class_fm=make.features(classification, datatype="SAMP", prefix="")
143
colnames(immunoscores_class_fm)=colnames(gexp)
144
145
# excluding categorical here, they slow everything down!
146
l.data_list=list(gexp, immunoscoresfm, immunoscores_class_fm)
147
data_list=data.frame(do.call(rbind, l.data_list))
148
149
# ******************************** Infer cell fractions ***********************************
150
151
# cibersort
152
results=read.delim("CIBERSORT-Results.txt", row.names = 1, header=T, stringsAsFactors = F)
153
colnames(results)=paste0("N:SAMP:CIBERSORT_", gsub(" |-|\\.", "_", colnames(results)), "")
154
cibersort=t(results)
155
cibersort=cibersort[,colnames(cibersort)%in%colnames(gexp)]
156
157
MCP=get(load("MCP_counter_data.Rdata"))
158
rownames(MCP)=paste0("N:SAMP:", rownames(MCP), "")
159
MCP=MCP[,colnames(MCP)%in%colnames(cibersort)]
160
161
l.fractions=list(cibersort, MCP)
162
fractions=data.frame(do.call(rbind, l.fractions), stringsAsFactors = F)
163
164
#********************************** Clinical data ******************************
165
166
files=list.files(".", pattern=".info.tsv", full.names = T)
167
168
surv_data=do.call(rbind, lapply(files, read.delim, header=T, stringsAsFactors=F))
169
surv_data=surv_data[surv_data[,1]%in%colnames(gexp),]
170
surv_data=surv_data[!is.na(surv_data[,3]),]
171
172
surv_d=t(surv_data[,2:3])
173
colnames(surv_d)=surv_data[,1]
174
rownames(surv_d)=c("N:CLIN:OS_Time", "B:CLIN:OS_Status")
175
176
surv_d=surv_d[,match(colnames(gexp), colnames(surv_d))]
177
colnames(surv_d)=colnames(gexp)
178
179
#******************************* tumor percentage ********************************
180
Sys.setlocale(locale="C")
181
sorteds=read.delim("sorted_samples.txt", stringsAsFactors=F, header=F)
182
183
gsm=unlist(lapply(1:dim(sorteds)[1], function(i){
184
  annot$GSM.identifier..sample.[annot$Sample.isolation%in%sorteds[i,1]&annot$colorClass%in%sorteds[i,2]]
185
}))
186
add=annot$GSM.identifier..sample.[grepl("CD303", annot$Sample.isolation)]
187
188
sorted=t(annot$GSM.identifier..sample.%in%c(gsm, add))
189
190
rownames(sorted)="B:CLIN:CELLS_SORTED"
191
colnames(sorted)=colnames(gexp)
192
193
sorted[,grepl("Padiatr", annot$Additional.notes)]=1
194
# sorted[,annot$Additional.notes=="The leukemic blasts were sorted based on CD41, CD7, CD117, CD33, and CD34 antibodies as previously described (Klin. Padiatr. 217, 126-134)."] = 1
195
196
tumor_per=gsub("blast%: |>=|%|blast cell percentage: |t_cell_purity: |>|;|blast count, % of sample, -1=unavailable : ","", annot$Purity.Tumor.cell.content)
197
tumor_per[tumor_per=="high"]=80
198
tumor_per[as.numeric(tumor_per)>100]=100
199
tumor_per[tumor_per=="-1"]=0
200
tumor_per[tumor_per%in%c("n./a.", "na")]=NA
201
202
malt=read.delim("clinical_annotations_MALT.txt", stringsAsFactors=F, header=T)
203
replace=malt[match(colnames(gexp), malt$GSMID),]
204
tumor_per[grepl("Percentage of tumor", tumor_per)]=replace$X..Tumor[grepl("Percentage of tumor", tumor_per)]
205
206
tumor_percentage=data.matrix(t(as.numeric(tumor_per)))
207
rownames(tumor_percentage)="N:SAMP:BLAST_TUMOR_PERCENTAGE"
208
colnames(tumor_percentage)=colnames(gexp)
209
210
# T-cell percentages
211
T_per=data.matrix(t(as.numeric(replace$X..T.cells)))
212
rownames(T_per)="N:SAMP:TCELL_PERCENTAGE"
213
colnames(T_per)=colnames(gexp)
214
215
tissue_per=data.matrix(t(as.numeric(replace$X..Lung)))
216
rownames(tissue_per)="N:SAMP:TISSUE_PERCENTAGE"
217
colnames(tissue_per)=colnames(gexp)
218
219
220
#*************************** adding some lymphoma annotation *************************************
221
anno = read.delim("GSE10846_series_matrix_info_ipi_clean.txt", stringsAsFactors=F, header=T)
222
anno=anno[anno[,1]%in%annot[,1],]
223
224
anno=anno[match(annot[,1], anno[,1]),]
225
226
annot$In.vivo.treatment[annot[,1]%in%anno[,1]]=gsub("*.*: |;", "", anno$chemotherapy[annot[,1]%in%anno[,1]])
227
annot$In.vivo.treatment[annot$In.vivo.treatment%in%"NA"]=NA
228
229
annot$dlbcl_ipi=NA
230
annot$dlbcl_ipi[annot[,1]%in%anno[,1]]=anno$ipi[annot[,1]%in%anno[,1]]
231
232
CHOP=t(annot$In.vivo.treatment%in%"CHOP-Like Regimen"*1)
233
RCHOP=t(annot$In.vivo.treatment%in%"R-CHOP-Like Regimen"*1)
234
CHOP[is.na(annot$In.vivo.treatment)]=NA
235
RCHOP[is.na(annot$In.vivo.treatment)]=NA
236
rownames(CHOP)="B:CLIN:Chemotherapy_CHOP"
237
rownames(RCHOP)="B:CLIN:Chemotherapy_RCHOP"
238
colnames(CHOP)=colnames(gexp)
239
colnames(RCHOP)=colnames(gexp)
240
241
# add cytogenetic information
242
genetics_org=read.delim("AML_preBALL_cytogenetics_vectors.txt", stringsAsFactors=F, header=T, row.names=1)
243
genetics=genetics_org[rownames(genetics_org)%in%colnames(gexp),]
244
genetics=t(genetics)*1
245
rownames(genetics)=paste("B:CLIN:", "GENETICS_", rownames(genetics), "", sep="")
246
247
cytogenetic=annot$Cytogenetics
248
cytogenetic[cytogenetic%in%c("na", "n/a", "", " ")]=NA
249
cytogenetic[grepl("without|unknown|remainingcytogenetics|no del13q|crlf2 fish: Normal|crlf2 fish: n/a", cytogenetic)]=NA
250
cytogenetic[grepl("ormal", cytogenetic)]="normal_karyotype"
251
cytogenetic[grepl("MLL", cytogenetic)]="MLL"
252
cytogenetic=gsub("complex aberrant karyotype", "complex karyotype", cytogenetic)
253
cytogenetic=gsub("hyperdiploid karyotype", "hyperdiploid", cytogenetic)
254
cytogenetic=gsub("TAL$", "TAL1", cytogenetic)
255
cytogenetic=gsub("remaining cytogenetics|other abNormalities", "other", cytogenetic)
256
cytogenetic=gsub("fish:|trisomy 8 |;deletion|; complex karyotype|, complex karyotype|: positive| chromosomal aberrations|/API2-MALT1|/API2-MALT1 negative|deletion *.*: negative|/IGH-MALT1|, plus other| plus other", "", cytogenetic)
257
cytogenetic=gsub("trisomy ", "trisomy", cytogenetic)
258
cytogenetic=gsub("TEL deleted", "TEL_deleted", cytogenetic)
259
cytogenetic=gsub("p13.1", "p13", cytogenetic)
260
cytogenetic=gsub(";$", "", cytogenetic)
261
cytogenetic=gsub("\\+ ", "+", cytogenetic)
262
cytogenetic=gsub("i\\(", "inv(", cytogenetic)
263
cytogenetic=gsub("complex karyotype", "complex_karyotype", cytogenetic)
264
cytogenetic=gsub("^ ", "", cytogenetic)
265
annot$cytogenetic_clean=cytogenetic
266
267
cytogenetic_terms=sort(unique(unlist(strsplit(cytogenetic, " "))))
268
269
cytogenetic_terms=unlist(strsplit(cytogenetic, "; |; |  |, | |/"))
270
cytogenetic_terms=gsub(" ", "", cytogenetic_terms)
271
272
terms=table(cytogenetic_terms)
273
shared=names(terms)[terms>5]
274
275
cytogenetics=do.call(rbind, mclapply(shared,FIND_LOGICAL, cytogenetic, mc.cores=8))
276
colnames(cytogenetics)=colnames(gexp)
277
#***********************
278
279
# age annotations
280
age=gsub("*.*: |^ |;| yr| age| years|-.*.$| Years|d 32.8|d 54.8", "", annot$Age)
281
age[age%in%c("na", "n/a", "", "not available")]=NA
282
age[grepl("month|Month", age)]=signif(as.numeric(gsub(" months.*.| months| Months", "", age[grepl("month|Month", age)]))/12, 2)
283
age[age=="Adult"]=30
284
age[age=="Children"]=5
285
age[age=="pediatric"]=1
286
age=t(as.numeric(age))
287
age[age>100]=NA
288
rownames(age)="N:CLIN:AGE"
289
colnames(age)=colnames(gexp)
290
291
# gender annotations
292
gender=toupper(annot$Gender)
293
gender[gender%in%c("GENDER: NOT AVAILABLE", "GENDER: NA;", "SEX: UNKNOWN;")]=NA
294
gender=gsub(" |;", "", gender)
295
gender=gsub("GENDER:|SEX/AGE:|/.*.|SEX:", "", gender)
296
gender[gender%in%c("F", "FEMALE", "WOMAN")]="female"
297
gender[gender%in%c("M", "MALE", "MAN")]="male"
298
gender=t(gender)
299
rownames(gender)="C:CLIN:GENDER"
300
colnames(gender)=colnames(gexp)
301
302
# race annotations
303
race=toupper(annot$Race)
304
race[grep("AGE", race)]=NA
305
race[grep("OTHER", race)]="OTHER"
306
race[grep("AFRICAN|RACE: AA;|RACE: B;", race)]="AFRICAN"
307
race[grep("HISPANIC|RACE: H; ", race)]="HISPANIC"
308
race[grepl("EUROPEAN|CAUCASIAN|ANGLO-AMERICAN|WHITE|RACE: W;|RACE: C;", race)]="EUROPEAN"
309
race[grep("ASIAN", race)]="ASIAN"
310
race[!grepl("ASIAN|AFRICAN|HISPANIC|EUROPEAN|OTHER", race)]=NA
311
312
race=t(race)
313
rownames(race)="C:CLIN:RACE"
314
colnames(race)=colnames(gexp)
315
316
#*************************** adding some myeloma annotation *************************************
317
annomm = read.delim2("GSE24080_MM_clininfo_GSMid_clean.txt", stringsAsFactors=F, header=T)
318
annomm2 = read.delim2("GSE19784_MM_survival_GSMid_iss_clean.txt", stringsAsFactors=F, header=T)
319
320
annomm$b2m=gsub("<0.5", "0.5", annomm$b2m)
321
annomm$b2m=as.numeric(gsub(",", ".", annomm$b2m))
322
323
annomm$aspc=as.numeric(gsub(",", ".", annomm$aspc))
324
annomm$bmpc=as.numeric(gsub(",", ".", annomm$bmpc))
325
326
# first combine the two:
327
library(data.table)
328
combmm=data.frame(rbindlist(list(annomm, annomm2), fill = TRUE), stringsAsFactors = F)
329
330
combmm=combmm[match(colnames(gexp), combmm$accession),]
331
332
# now add these vectors to annot table
333
age[colnames(gexp)%in%combmm$accession]=signif(combmm$age[colnames(gexp)%in%combmm$accession], 3)
334
gender[colnames(gexp)%in%combmm$accession]=combmm$sex[colnames(gexp)%in%combmm$accession]
335
336
combmm$race[combmm$race%in%"other"]="OTHER"
337
combmm$race[combmm$race%in%"white"]="EUROPEAN"
338
race[colnames(gexp)%in%combmm$accession]=combmm$race[colnames(gexp)%in%combmm$accession]
339
340
# time and status:
341
surv_d[1,colnames(gexp)%in%combmm$accession]=signif(combmm$os_time[colnames(gexp)%in%combmm$accession], 3)
342
surv_d[2,colnames(gexp)%in%combmm$accession]=signif(combmm$os_censor[colnames(gexp)%in%combmm$accession], 3)
343
344
# pfs time and status
345
pfs=cbind(combmm$pfs_time, combmm$pfs_censor)
346
pfs=t(pfs)
347
colnames(pfs)=colnames(gexp)
348
rownames(pfs)=c("N:CLIN:PFS_Time", "B:CLIN:PFS_Status")
349
350
# other myeloma annotations:
351
otherMM_C=t(combmm[,c(4,8)])
352
rownames(otherMM_C)=paste0("C:CLIN:MM_", toupper(rownames(otherMM_C)), "")
353
colnames(otherMM_C)=colnames(gexp)
354
355
# numeric myeloma:
356
otherMM_N=t(combmm[,c(9:20)])
357
rownames(otherMM_N)=paste0("N:CLIN:MM_", toupper(rownames(otherMM_N)), "")
358
colnames(otherMM_N)=colnames(gexp)
359
360
add_mm=t(combmm$cytogenetic_abnormalities)
361
colnames(add_mm)=colnames(gexp)
362
rownames(add_mm)=c("B:CLIN:MM_CYTOGENETIC_ABNORMALITIES")
363
364
l.clin=list(sorted, surv_d, pfs, tumor_percentage,T_per,tissue_per, CHOP, RCHOP, gender, age,race, cytogenetics, genetics, add_mm, otherMM_N, otherMM_C)
365
clin=data.frame(do.call(rbind, l.clin), stringsAsFactors = F)
366
367
#****************************** make annotation clusters ******************************************
368
annot$acute=rep("other", dim(annot)[1])
369
annot$acute[annot$colorClass=="AML"|annot$colorClass=="pre-B-ALL"|annot$colorClass=="T-ALL"]="acute_leukemias"
370
annot$acute[annot$colorClass=="CLL"|annot$colorClass=="CML"]="chronic_leukemias"
371
annot$acute[grepl("NonCancer", annot$MAINCLASS)]="NonCancer"
372
annot$disease=rep("other", dim(annot)[1])
373
annot$disease[grepl("NonCancer", annot$Sub.maps.available)]="NonCancer"
374
annot$CLASS2=annot$CLASS
375
annot$CLASS2[grepl("CellLine_Myeloma", annot$CLASS2)]="CellLine_Myeloma"
376
annot$CLASS2[grepl("CellLine_Leukemia", annot$CLASS2)]="CellLine_Leukemia"
377
annot$CLASS2[grepl("CellLine_Lymphoma", annot$CLASS2)]="CellLine_Lymphoma"
378
annot$CLASS2[grepl("CellLine_mix", annot$CLASS2)]="CellLine_mix"
379
380
findthese=c("NonCancer", "Cancer_Leukemia", "Cancer_Myeloma", "Cancer_Lymphoma","CellLine_Leukemia","CellLine_Lymphoma","CellLine_Myeloma","Prolif_Lymphoproliferative_ALPS","Prolif_Lymphoproliferative_MPN", "Prolif_Myeloproliferative_LCH_LC", "Prolif_Myeloproliferative_MDS")
381
382
for(f in findthese){
383
  annot$disease[grepl(f, annot$MAINCLASS)]=f
384
}
385
386
annot$subclasses=rep("other", dim(annot)[1])
387
findthese=c("NonCancer", "Cancer_Myeloma", "Cancer_Lymphoma", "CellLine_Leukemia", "CellLine_Lymphoma","CellLine_Myeloma","Prolif_Lymphoproliferative_ALPS","Prolif_Lymphoproliferative_MPN", "Prolif_Myeloproliferative_LCH_LC", "Prolif_Myeloproliferative_MDS")
388
findthese2=c("T-ALL", "pre-B-ALL", "AML","CML","CLL", "BCL", "TCL", "B-Lymphoid", "T-Lymphoid","Lymphoid", "Myeloid", "Erythroid", "StemCell")
389
390
for(f in findthese){
391
  annot$subclasses[grepl(f, annot$MAINCLASS)]=f
392
}
393
for(f in findthese2){
394
  annot$subclasses[grepl(f, annot$colorClass)]=f
395
}
396
DLBCL=c("Cancer_Lymphoma_BCL_DLBCL_ABC", "Cancer_Lymphoma_BCL_DLBCL_GCB", "Cancer_Lymphoma_BCL_DLBCL_na")
397
annot$subclasses[annot$CLASS2%in%DLBCL]="BCL_DLBCL"
398
399
400
annot$CLASS=gsub("_na|_check|_testicular", "",annot$CLASS)
401
402
# lymphoma annotations
403
bLY=(1:nrow(annot)%in%grep("Lymphoma_BCL",annot$CLASS))&(!1:nrow(annot)%in%grep("CellLine",annot$CLASS))&(!1:nrow(annot)%in%grep("NonCancer",annot$CLASS))
404
tLY=(1:nrow(annot)%in%grep("Lymphoma_TCL",annot$CLASS))&(!1:nrow(annot)%in%grep("CellLine",annot$CLASS))&(!1:nrow(annot)%in%grep("NonCancer",annot$CLASS))
405
annot$CLASS=gsub("Cancer_", "", annot$CLASS)
406
407
annot$tbLY=NA
408
annot$tbLY[bLY|tLY]=annot$CLASS[bLY|tLY]
409
410
# these are the terms to look for
411
# table(annot$disease)
412
# table(annot$colorClass)
413
# table(annot$acute)
414
# table(annot$subclasses)
415
# table(annot$tbLY)
416
417
#*******************************************************************************************
418
#*************************** annotation clusters ********************************************
419
420
#****************************** make clusters ******************************************
421
422
# make immunological normal annotation vectors
423
annot$plotNormals = "Other"
424
425
lv=!annot$Sample.type%in%"NonCancerHealthy"
426
annot$plotNormals[lv]=""
427
428
lv=grepl("RestingBcell|NaiveBcell|MemoryBcell|BcellActivated", annot$CLASS)
429
annot$plotNormals[lv]="B cell"
430
431
lv=grepl("GerminalCentre", annot$CLASS)
432
annot$plotNormals[lv]="Germinal centre cell"
433
434
lv=grepl("PlasmaBcell", annot$CLASS)
435
annot$plotNormals[lv]="Plasma cell"
436
437
lv=grepl("Tcell|NaturalKillerCell", annot$CLASS)
438
annot$plotNormals[lv]="T/NK cell"
439
440
lv=grepl("DendriticCell", annot$CLASS)
441
annot$plotNormals[lv]="Dendritic cell"
442
443
lv=grepl("Langerhans", annot$CLASS)
444
annot$plotNormals[lv]="Langerhans cell"
445
446
lv=grepl("Eryth|Platelet", annot$CLASS)
447
annot$plotNormals[lv]="Erythroid"
448
449
lv=grepl("Monocyte", annot$CLASS)
450
annot$plotNormals[lv]="Monocyte"
451
452
lv=grepl("Macrophage", annot$CLASS)
453
annot$plotNormals[lv]="Macrophage"
454
455
lv=grepl("Neutrophil", annot$CLASS)
456
annot$plotNormals[lv]="Neutrophil"
457
458
lv=grepl("MyeloidProgenitor", annot$CLASS)
459
annot$plotNormals[lv]="Myeloid progenitor"
460
461
lv=grepl("HematopoieticStemCell", annot$CLASS)
462
annot$plotNormals[lv]="HSC"
463
464
lv=grepl("^Mononuclear", annot$CLASS)
465
annot$plotNormals[lv]="PBMC"
466
467
lv=grepl("LymphNode", annot$CLASS)
468
annot$plotNormals[lv]="Lymph node"
469
470
HLAplot_normals <- c("B cell", "Plasma cell", "T/NK cell", "Dendritic cell", "Erythroid", "Monocyte", "Macrophage", "Neutrophil", "Myeloid progenitor", "HSC")
471
cytolyticplot_normals <- c("PBMC", "Lymph node")
472
costimplot_normals <- c(HLAplot_normals, "PBMC", "Lymph node", "Langerhans cell", "Germinal centre cell")
473
474
annot$immunoNormals=annot$Category.specifying.lineage.tumor.origin
475
476
lv=grepl("CD8|CD8+TcellActivated", annot$Category.specifying.lineage.tumor.origin)
477
annot$immunoNormals[lv]="CD8+Tcell"
478
479
lv=grepl("NaturalKiller", annot$Category.specifying.lineage.tumor.origin)
480
annot$immunoNormals[lv]="NKCell"
481
482
lv=grepl("M2-Macrophage", annot$Category.specifying.lineage.tumor.origin)
483
annot$immunoNormals[lv]="M2-Macrophage"
484
485
lv=grepl("M1-Macrophage", annot$Category.specifying.lineage.tumor.origin)
486
annot$immunoNormals[lv]="M1-Macrophage"
487
488
lv=grepl("DendriticCell", annot$Category.specifying.lineage.tumor.origin)
489
annot$immunoNormals[lv]="DendriticCell"
490
491
lv=grepl("Monocyte", annot$Category.specifying.lineage.tumor.origin)
492
annot$immunoNormals[lv]="Monocyte"
493
494
lv=grepl("CD4+", annot$Category.specifying.lineage.tumor.origin)
495
annot$immunoNormals[lv]="CD4+Tcell"
496
497
lv=!annot$Sample.type%in%"NonCancerHealthy"
498
annot$immunoNormals[lv]=""
499
500
lv=grepl("Eryth|Platelet", annot$Category.specifying.lineage.tumor.origin)
501
annot$immunoNormals[lv]="Erythroid"
502
503
lv=grepl("CD3", annot$Category.specifying.lineage.tumor.origin)
504
annot$immunoNormals[lv]="Tcell"
505
506
lv=grepl("GerminalCentre", annot$Category.specifying.lineage.tumor.origin)
507
annot$immunoNormals[lv]="GerminalCentreCell"
508
509
lv=grepl("^Tcell$|^TcellActivated$|^TcellResting$", annot$Category.specifying.lineage.tumor.origin)
510
annot$immunoNormals[lv]="Tcell"
511
512
lv=grepl("^ActivatedBcell$|^RestingBcell$|^BcellActivated$", annot$Category.specifying.lineage.tumor.origin)
513
annot$immunoNormals[lv]="Bcell"
514
515
# annotated clusters
516
tbLY=FUN_MAKE_ALL(annot$tbLY, "annotated_class", annot$tbLY, 0)
517
subclasses=FUN_MAKE_ALL(annot$subclasses, "annotated_class", annot$subclasses, 0)
518
acute_chronic=FUN_MAKE_ALL(annot$acute, "annotated_class", annot$acute, 0)
519
colorClass=FUN_MAKE_ALL(annot$colorClass, "annotated_class", annot$colorClass, 0)
520
disease=FUN_MAKE_ALL(annot$disease, "annotated_class", annot$disease, 0)
521
tbLY=FUN_MAKE_ALL(annot$tbLY, "annotated_class", annot$tbLY, 0)
522
fullclass=FUN_MAKE_ALL(annot$CLASS2, "annotated_class", annot$CLASS2, 0)
523
immunoclass=FUN_MAKE_ALL(annot$immunoNormals, "annotated_class_immunoNormals", annot$immunoNormals, 0)
524
525
l.comparisons=list(disease, colorClass, acute_chronic,subclasses, tbLY, fullclass, immunoclass)
526
comparisons=do.call(rbind, l.comparisons)
527
comparisons=data.frame(data.matrix(comparisons[!duplicated(rownames(comparisons)),]), stringsAsFactors = F)
528
colnames(comparisons)=colnames(gexp)
529
530
# test if all rows are fine, should be >1 values
531
A=apply(comparisons, 1, unique)
532
533
B=unlist(lapply(A, function(d)sum(d%in%c(1,0))>=2))
534
535
if(!all(B))stop("Check comparisons, impossible comparisons made")
536
537
# categorical feats
538
class1=FUN_MAKE_CATEGORICAL(annot$tbLY, "annotated_class_BCL_TCL")
539
class2=FUN_MAKE_CATEGORICAL(annot$colorClass, "annotated_class_colorclass")
540
class3=FUN_MAKE_CATEGORICAL(annot$disease, "annotated_class_disease")
541
class4=FUN_MAKE_CATEGORICAL(annot$immunoNormals, "annotated_class_immunoNormals")
542
543
l.comparisons=list(class1, class2, class3, class4)
544
comparisons_cat=do.call(rbind, l.comparisons)
545
comparisons_cat=data.frame(comparisons_cat[!duplicated(rownames(comparisons_cat)),], stringsAsFactors = F)
546
547
colnames(comparisons_cat)=colnames(gexp)
548
549
550
#****************************************************************************************************
551
#******************** Next we start to create features of these individual data types ***************
552
#****************************************************************************************************
553
554
l.fm=list(data_list,clin,fractions, comparisons)
555
556
library(data.table)
557
fm=rbindlist(l.fm, use.names=F, fill=F)
558
559
fm=data.frame(fm, stringsAsFactors=F)
560
rownames(fm)=unlist(lapply(l.fm, rownames))
561
562
matrix=fm
563
564
# also add clinicaldata to annotations
565
numclin=t(data.matrix(clin[!grepl("^C:", rownames(clin)),]))
566
numchr=t(clin[grepl("^C:", rownames(clin)),])
567
colnames(numclin)=gsub(".:CLIN:|.:GEXP:|", "", colnames(numclin))
568
colnames(numchr)=gsub(".:CLIN:|.:GEXP:|", "", colnames(numchr))
569
570
annot_add=data.frame(numclin, numchr, stringsAsFactors = F)
571
572
fractions2=t(fractions)
573
colnames(fractions2)=gsub("N:SAMP:", "", colnames(fractions2))
574
annot2=data.frame(annot, annot_add, "CytolyticScore"=as.numeric(gm1) ,"HLAIScore"=as.numeric(gm2), "HLAIIScore"=as.numeric(gm3),classification,fractions2, stringsAsFactors = F)
575
576
577
# annot
578
clusters=read.delim("AML_15pct_BHSNE_mean-shift.txt", stringsAsFactors=F, header=T)
579
clusters=clusters[clusters$ID%in%annot$GSM.identifier..sample.,]
580
clusters_cancermap=clusters$X1.5..cluster
581
582
matrix_sub=matrix[,colnames(matrix)%in%clusters$ID]
583
annot_sub=annot[annot$GSM.identifier..sample.%in%clusters$ID,]
584
585
# TCGA clusters
586
cluster_mapping=read.delim("Table_TCGA_cluster_AML_cluster_assignment.txt", header=T, stringsAsFactors=F, sep="\t")
587
TCGA_cluster=rep("NA", dim(annot_sub)[1])
588
589
TCGA_cluster[clusters_cancermap%in%cluster_mapping[1,2]]="TCGA_AML_cluster_1"
590
TCGA_cluster[clusters_cancermap%in%cluster_mapping[2,2]]="TCGA_AML_cluster_2"
591
TCGA_cluster[clusters_cancermap%in%cluster_mapping[3:5,2]]="TCGA_AML_cluster_3"
592
TCGA_cluster[clusters_cancermap%in%cluster_mapping[6:10,2]]="TCGA_AML_cluster_4"
593
TCGA_cluster[clusters_cancermap%in%cluster_mapping[11,2]]="TCGA_AML_cluster_5"
594
TCGA_cluster[clusters_cancermap%in%cluster_mapping[12:15,2]]="TCGA_AML_cluster_6"
595
TCGA_cluster[clusters_cancermap%in%cluster_mapping[16:17,2]]="TCGA_AML_cluster_7"
596
597
n=lapply(unique(TCGA_cluster), function(i)annot_sub$GSM.identifier..sample.[TCGA_cluster%in%i])
598
names(n)=unique(TCGA_cluster)
599
n=n[!unique(TCGA_cluster)%in%"NA"]
600
save(n, file="Hemap_immunology_TCGA_clusters.Rdata")
601
602
save(matrix, file="Hemap_immunology_fm.Rdata")
603
save(annot2, file="Hemap_immunology_Annotations.Rdata")
604
write.table(annot2,"Hemap_immunology_Annotations.tsv", sep="\t", col.names=T, row.names=F, quote=FALSE)
605
606
write.table(t(c("N:SAMP", as.character(colnames(matrix)))), file="Hemap_immunology_fm.tsv", sep="\t", col.names=F, row.names=F, quote=FALSE, append=F)
607
write.table(matrix, file="Hemap_immunology_fm.tsv", sep="\t", col.names=F, row.names=T, quote=FALSE, append=T)
608
609
# make a small fix here to harmonize survival data to months:
610
load("Hemap_immunology_Annotations.Rdata")
611
612
unique(cbind(annot2[!is.na(annot2$OS_Time), c(2,4)]))
613
annot2$OS_Time[!is.na(annot2$OS_Time)&annot2[,2]%in%c("GSE10846,GSE11318", "GSE10846", "GSE10846,GSE17372", "GSE11877")]=annot2$OS_Time[!is.na(annot2$OS_Time)&annot2[,2]%in%c("GSE10846,GSE11318", "GSE10846", "GSE10846,GSE17372", "GSE11877")]*12
614
615
# for myeloma, transform data to 5y survival to compare data sets:
616
modify=!is.na(annot2$OS_Time)&annot2[,2]%in%c("GSE16716,GSE24080")
617
find=annot2$OS_Time>60&modify
618
619
# change status to alive if dead later
620
find2=annot2$OS_Status==1&modify
621
annot2$OS_Status[find&find2]=0
622
annot2$OS_Time[find]=60
623
624
save(annot2, file="Hemap_immunology_Annotations.Rdata")
625
626
#****************************************************************************************
627
# This FM can then be used as a backbone for other FMs. GSVA and clusters must be added
628
#****************************************************************************************
629
630
matrix=get(load("Hemap_immunology_fm.Rdata"))
631
annot=get(load("Hemap_immunology_Annotations.Rdata"))
632
633
#********************************** Full map **********************************
634
clusters=read.delim("anno_coord_data9544_15pct_bw2.5_updated.txt", stringsAsFactors=F, header=T)
635
clusters=clusters[clusters$ID%in%annot$GSM.identifier..sample.,]
636
637
#*********************************** GSVA input ****************************
638
gsva=get(load("data8238_dufva_immunological_genes_updated_2016_GSVA_geneperm_lean_eFDR.Rdata"))
639
bindea=get(load("data8238_all_samples_dufva_bindea_2013_geneset_GSVA.Rdata"))
640
load("data8238_all_samples_Combined_pathway_signatures_210616_GSVA.Rdata")
641
gsva_es=rbind(gsva, bindea, gsva_es)
642
gsva_es=gsva_es[!duplicated(rownames(gsva_es)),]
643
644
# match cols gsva
645
gsva_es=data.frame(gsva_es[,match(colnames(matrix), colnames(gsva_es))])
646
colnames(gsva_es)=colnames(matrix)
647
648
#**************************** Cancermap clusters ****************************
649
clusters_cancermap=clusters$X2.5..cluster
650
cluster_cancermap=FUN_MAKE_ALL(clusters_cancermap, "cancermap_cluster", clusters_cancermap, 0)
651
subclasses=FUN_MAKE_ALL(annot$subclasses, "cancermap_cluster", clusters_cancermap, 0.8)
652
acute_chronic=FUN_MAKE_ALL(annot$acute, "cancermap_cluster", clusters_cancermap, 0.8)
653
colorClass=FUN_MAKE_ALL(annot$colorClass, "cancermap_cluster", clusters_cancermap, 0.8)
654
disease=FUN_MAKE_ALL(annot$disease, "cancermap_cluster", annot$disease, 0.8)
655
fullclass=FUN_MAKE_ALL(annot$CLASS2, "cancermap_cluster", annot$CLASS2, 0.8)
656
657
class_cancermap=FUN_MAKE_CATEGORICAL(clusters_cancermap, "cancermap_cluster")
658
659
l.comparisons=list(cluster_cancermap, subclasses, acute_chronic, colorClass, disease,fullclass, class_cancermap)
660
comparisons_cat=do.call(rbind, l.comparisons)
661
comparisons_cat=data.frame(comparisons_cat[!duplicated(rownames(comparisons_cat)),], stringsAsFactors = F)
662
colnames(comparisons_cat)=colnames(matrix)
663
664
# combine
665
l.fm=list(matrix, gsva_es, comparisons_cat)
666
667
library(data.table)
668
fm=rbindlist(l.fm, use.names=F, fill=F)
669
670
fm=data.frame(fm, stringsAsFactors=F)
671
rownames(fm)=unlist(lapply(l.fm, rownames))
672
673
# remove rows with few values or NAs
674
rm=apply(fm, 1, function(v)all(is.na(v)))
675
fm=fm[!rm,]
676
677
save(fm, file="Hemap_immunology_fm_cancermap.Rdata")
678
679
write.table(t(c("N:SAMP", as.character(colnames(fm)))), file="Hemap_immunology_fm_cancermap.tsv", sep="\t", col.names=F, row.names=F, quote=FALSE, append=F)
680
write.table(fm, file="Hemap_immunology_fm_cancermap.tsv", sep="\t", col.names=F, row.names=T, quote=FALSE, append=T)
681
682
683
#********************************** Lymphoma **********************************
684
matrix=get(load("Hemap_immunology_fm.Rdata"))
685
annot=get(load("Hemap_immunology_Annotations.Rdata"))
686
687
# annot
688
clusters=read.delim("Hemap_Lymphoma_15pct_genes_BHSNE_mean-shift.txt", stringsAsFactors=F, header=T)
689
clusters=clusters[clusters$ID%in%annot$GSM.identifier..sample.,]
690
691
matrix_sub=matrix[,colnames(matrix)%in%clusters$ID]
692
annot_sub=annot[annot$GSM.identifier..sample.%in%clusters$ID,]
693
694
load("data9544_LYMPHOMA_all_samples_Combined_pathway_drug_signatures_2017_GSVA.Rdata")
695
rownames(gsva_es)=gsub(" ", "_", rownames(gsva_es))
696
697
# match cols gsva
698
gsva_es=data.frame(gsva_es[,match(colnames(matrix_sub), colnames(gsva_es))])
699
colnames(gsva_es)=colnames(matrix_sub)
700
701
clusters_cancermap=clusters$X1.5..cluster
702
703
# comparisons
704
cluster_subtypes=FUN_MAKE_ALL(annot_sub$CLASS, "cancermap_cluster", clusters_cancermap, 0.8)
705
cluster_cancermap=FUN_MAKE_ALL(clusters_cancermap, "cancermap_cluster", clusters_cancermap, 0)
706
cluster_BCL_TCL=FUN_MAKE_ALL(annot_sub$tbLY, "cancermap_cluster", clusters_cancermap, 0.8)
707
class_cancermap=FUN_MAKE_CATEGORICAL(clusters_cancermap, "cancermap_cluster")
708
709
l.comparisons=list(cluster_subtypes, cluster_cancermap, cluster_BCL_TCL, class_cancermap)
710
comparisons_cat=do.call(rbind, l.comparisons)
711
comparisons_cat=data.frame(data.matrix(comparisons_cat[!duplicated(rownames(comparisons_cat)),]), stringsAsFactors = F)
712
713
714
# combine
715
l.fm=list(matrix_sub, data.frame(gsva_es), comparisons_cat)
716
717
library(data.table)
718
fm=rbindlist(l.fm, use.names=F, fill=F)
719
720
fm=data.frame(fm, stringsAsFactors=F)
721
rownames(fm)=unlist(lapply(l.fm, rownames))
722
723
# remove rows with few values or NAs
724
rm=apply(fm, 1, function(v)all(is.na(v)))
725
fm=fm[!rm,]
726
727
save(fm, file="Hemap_LYMPHOMA_immunology_fm.Rdata")
728
729
write.table(t(c("N:SAMP", as.character(colnames(fm)))), file="Hemap_LYMPHOMA_immunology_fm.tsv", sep="\t", col.names=F, row.names=F, quote=FALSE, append=F)
730
write.table(fm, file="Hemap_LYMPHOMA_immunology_fm.tsv", sep="\t", col.names=F, row.names=T, quote=FALSE, append=T)
731
732
#********************************** AML **********************************
733
734
matrix=get(load("Hemap_immunology_fm.Rdata"))
735
annot=get(load("Hemap_immunology_Annotations.Rdata"))
736
737
# annot
738
clusters=read.delim("AML_15pct_BHSNE_mean-shift.txt", stringsAsFactors=F, header=T)
739
clusters=clusters[clusters$ID%in%annot$GSM.identifier..sample.,]
740
741
clusters_cancermap=clusters$X1.5..cluster
742
743
matrix_sub=matrix[,colnames(matrix)%in%clusters$ID]
744
annot_sub=annot[annot$GSM.identifier..sample.%in%clusters$ID,]
745
746
# GSVA input
747
gsva=get(load("data9544_AML_all_samples_Combined_pathway_drug_signatures_2017_GSVA.Rdata"))
748
749
# match cols gsva
750
gsva_es=data.frame(gsva_es[,match(colnames(matrix_sub), colnames(gsva_es))])
751
colnames(gsva_es)=colnames(matrix_sub)
752
753
# TCGA clusters
754
cluster_mapping=read.delim("Table_TCGA_cluster_AML_cluster_assignment.txt", header=T, stringsAsFactors=F, sep="\t")
755
756
annot_sub$TCGA_cluster=rep("NA", dim(annot_sub)[1])
757
758
annot_sub$TCGA_cluster[clusters_cancermap%in%cluster_mapping[1,2]]="TCGA_AML_cluster_1"
759
annot_sub$TCGA_cluster[clusters_cancermap%in%cluster_mapping[2,2]]="TCGA_AML_cluster_2"
760
annot_sub$TCGA_cluster[clusters_cancermap%in%cluster_mapping[3:5,2]]="TCGA_AML_cluster_3"
761
annot_sub$TCGA_cluster[clusters_cancermap%in%cluster_mapping[6:10,2]]="TCGA_AML_cluster_4"
762
annot_sub$TCGA_cluster[clusters_cancermap%in%cluster_mapping[11,2]]="TCGA_AML_cluster_5"
763
annot_sub$TCGA_cluster[clusters_cancermap%in%cluster_mapping[12:15,2]]="TCGA_AML_cluster_6"
764
annot_sub$TCGA_cluster[clusters_cancermap%in%cluster_mapping[16:17,2]]="TCGA_AML_cluster_7"
765
766
# comparisons
767
cluster_TCGA=FUN_MAKE_ALL(annot_sub$TCGA_cluster, "cancermap_cluster", annot_sub$TCGA_cluster, 0.9)
768
cluster_cancermap=FUN_MAKE_ALL(clusters_cancermap, "cancermap_cluster", clusters_cancermap, 0)
769
cluster_subtypes=FUN_MAKE_ALL(annot_sub$CLASS, "cancermap_cluster", clusters_cancermap, 0.9)
770
class_cancermap=FUN_MAKE_CATEGORICAL(clusters_cancermap, "cancermap_cluster")
771
class_TCGA=FUN_MAKE_CATEGORICAL(annot_sub$TCGA_cluster, "cancermap_cluster")
772
773
l.comparisons=list(cluster_TCGA, cluster_cancermap, cluster_subtypes, class_cancermap, class_TCGA)
774
comparisons_cat=do.call(rbind, l.comparisons)
775
comparisons_cat=data.frame(data.matrix(comparisons_cat[!duplicated(rownames(comparisons_cat)),]), stringsAsFactors = F)
776
777
# combine
778
l.fm=list(matrix_sub, data.frame(gsva_es), comparisons_cat)
779
780
library(data.table)
781
fm=rbindlist(l.fm, use.names=F, fill=F)
782
783
fm=data.frame(fm, stringsAsFactors=F)
784
rownames(fm)=unlist(lapply(l.fm, rownames))
785
786
# remove rows with few values or NAs
787
rm=apply(fm, 1, function(v)all(is.na(v)))
788
fm=fm[!rm,]
789
790
save(fm, file="Hemap_AML_immunology_fm.Rdata")
791
792
write.table(t(c("N:SAMP", as.character(colnames(fm)))), file="Hemap_AML_immunology_fm.tsv", sep="\t", col.names=F, row.names=F, quote=FALSE, append=F)
793
write.table(fm, file="Hemap_AML_immunology_fm.tsv", sep="\t", col.names=F, row.names=T, quote=FALSE, append=T)
794
795
# write.table(all_isolation, file="Hemap_all_isolation.tsv", sep="\t", col.names=F, row.names=F, quote=FALSE, append=F)