|
a |
|
b/FigS2AB_microenvironment_analysis_GSEA_GSVA.R |
|
|
1 |
GIT_HOME="/research/users/ppolonen/git_home/ImmunogenomicLandscape-BloodCancers/" |
|
|
2 |
source(file.path(GIT_HOME, "common_scripts/featurematrix/functions_generate_fm.R")) |
|
|
3 |
source(file.path(GIT_HOME, "common_scripts/visualisation/plotting_functions.R")) |
|
|
4 |
source(file.path(GIT_HOME, "common_scripts/pathway_analysis/functions.GSEA.R")) |
|
|
5 |
|
|
|
6 |
library(GSVA) |
|
|
7 |
library(parallel) |
|
|
8 |
|
|
|
9 |
# FigureS2 A-B related analysis |
|
|
10 |
setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/Published_data_figures") |
|
|
11 |
|
|
|
12 |
# annotations |
|
|
13 |
annot=get(load("Hemap_immunology_Annotations.Rdata")) |
|
|
14 |
|
|
|
15 |
load("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/data/data9544_with_gene_symbols.RData") |
|
|
16 |
data = t(data[rownames(data)%in%annot$GSM.identifier..sample.,]) |
|
|
17 |
|
|
|
18 |
subclass2=get.logical(annovector = list(annot$subclasses), filterv = annot$CELLS_SORTED==0&annot$Sample.type%in%c("Prolif", "Cancer")) |
|
|
19 |
tbly2=get.logical(annovector = list(annot$tbLY), filterv = annot$CELLS_SORTED==0&annot$Sample.type%in%c("Prolif", "Cancer")) |
|
|
20 |
tbly2=tbly2[!names(tbly2)%in%"Lymphoma_BCL"] |
|
|
21 |
|
|
|
22 |
list_cancers=lapply(c(subclass2, tbly2), list) |
|
|
23 |
list_cancers=unlist(list_cancers, recursive=F) |
|
|
24 |
|
|
|
25 |
l=c("AML", "BCL_DLBCL", "CLL", "CML", "Lymphoma_BCL_CHL", "Lymphoma_BCL_MCL", "Lymphoma_BCL_FL", "pre-B-ALL", "Prolif_Myeloproliferative_MDS", "T-ALL") |
|
|
26 |
logicalvectors=list_cancers[names(list_cancers)%in%l] |
|
|
27 |
|
|
|
28 |
#*********************** run GSEA using cytolyticScore as ranking metric on each of the disease: *********************** |
|
|
29 |
|
|
|
30 |
GENESETS=file.path(getwd(), "HALLMARKS.gmt") |
|
|
31 |
WD=file.path(getwd(), "GSEA") |
|
|
32 |
OUTDIR=file.path(getwd(), "GSEA") |
|
|
33 |
|
|
|
34 |
names(logicalvectors)=gsub("-", "_", names(logicalvectors)) |
|
|
35 |
|
|
|
36 |
res=parallel::mclapply(seq(logicalvectors), function(i){ |
|
|
37 |
filt=logicalvectors[[i]] |
|
|
38 |
command=run.GSEA(data=data[,filt], cls.vector = as.numeric(annot$CytolyticScore[filt]), datatype = "N", GENESETS = GENESETS, dataname = paste0(names(logicalvectors)[i]), clsname = "CytolyticScore", WD=WD, OUTDIR=OUTDIR) |
|
|
39 |
try(system(command)) |
|
|
40 |
}, mc.cores=5) |
|
|
41 |
|
|
|
42 |
#*********************************************************************************************************************** |
|
|
43 |
|
|
|
44 |
#************** run GSEA using cytolyticScore as ranking metric on TNK cells vs other normal ****************** |
|
|
45 |
cls=annot$immunoNormals%in%c("CD8+Tcell", "NKCell") |
|
|
46 |
filt=!annot$immunoNormals%in%c("") |
|
|
47 |
|
|
|
48 |
command=run.GSEA(data=data[,filt], cls.vector = as.numeric(cls[filt]), datatype = "B", GENESETS = GENESETS, dataname = "CD8_NK_pathways", clsname = "CD8_NK", WD=WD, OUTDIR=OUTDIR) |
|
|
49 |
try(system(command)) |
|
|
50 |
|
|
|
51 |
#*********************************************************************************************************************** |
|
|
52 |
|
|
|
53 |
# combine results to same matrix: |
|
|
54 |
datp=list.files(path = OUTDIR, pattern = "gsea_report_for_1_*.*.xls|gsea_report_for_feat_pos.*..xls", recursive = T, full.names = T) |
|
|
55 |
datn=list.files(path = OUTDIR, pattern = "gsea_report_for_0_*.*.xls|gsea_report_for_feat_neg.*..xls", recursive = T, full.names = T) |
|
|
56 |
|
|
|
57 |
# filter to contain only significant |
|
|
58 |
pwnames=scan(pipe("cut -f1 HALLMARKS.gmt"), "genesets") |
|
|
59 |
|
|
|
60 |
FUN=function(f){ |
|
|
61 |
|
|
|
62 |
dat=try(read.delim(f, stringsAsFactors=F, header=T), silent = T) |
|
|
63 |
dat$FDR.q.val[dat$FDR.q.val==0]=0.0001 |
|
|
64 |
# variable.1, variable.2, features (y name), id (x name) |
|
|
65 |
d=data.frame("features"=gsub("-MSIGDB_HALLMARKS", "", dat$NAME), "variable.1"=dat$NES, "variable.2"=-log10(as.numeric(dat$FDR.q.val)), "id"=gsub("_CytolyticScore_.*.|_pathways_.*.|Prolif_Myeloproliferative_|Lymphoma_BCL_|^/BCL_|/","", gsub(eval(OUTDIR), "", f))) |
|
|
66 |
|
|
|
67 |
} |
|
|
68 |
|
|
|
69 |
pwp=mclapply(datp, FUN, mc.cores=10) |
|
|
70 |
pwp2=mclapply(datn, FUN, mc.cores=10) |
|
|
71 |
|
|
|
72 |
dat=do.call(rbind, pwp) |
|
|
73 |
dat2=do.call(rbind, pwp2) |
|
|
74 |
|
|
|
75 |
dat_comb=rbind(dat, dat2) |
|
|
76 |
|
|
|
77 |
dat_comb$id=factor(gsub("_", "-", dat_comb$id), levels=c("DLBCL","CHL", "FL", "MCL", "CLL", "CML", "AML", "MDS", "pre-B-ALL", "T-ALL", "CD8-NK")) |
|
|
78 |
|
|
|
79 |
pdf("FigureS2A.pdf", width = 5.5, height = 6.5) |
|
|
80 |
plot.DotPlot.df(data.plot = dat_comb, cols = c("blue", "white", "red"), fontsize = 8, name.variable.1 = "NES", name.variable.2 = "-log10 FDR", dot.scale = 3) |
|
|
81 |
dev.off() |
|
|
82 |
|
|
|
83 |
# plot correlation plot between protein and Pathway |
|
|
84 |
fm=get(load("TCGA_DLBCL_FM_DUFVA.Rdata")) |
|
|
85 |
|
|
|
86 |
# run GSVA for certain pathways and add to matrix: |
|
|
87 |
gexp=data.matrix(fm[grepl("N:GEXP", rownames(fm)),]) |
|
|
88 |
rownames(gexp)=gsub(":.*.", "", gsub("N:GEXP:", "",rownames(gexp))) |
|
|
89 |
|
|
|
90 |
# gene sets: |
|
|
91 |
GENESETS="Combined_pathway_signatures_2017_filtered_robust.gmt" |
|
|
92 |
|
|
|
93 |
# get GSVA visualization for the pathways |
|
|
94 |
# Geneset list |
|
|
95 |
Onc.pathways=read.delim(GENESETS, stringsAsFactors = FALSE, header=F, col.names = paste("V",1:max(count.fields(GENESETS, sep = '\t'), na.rm = T)), fill = TRUE) |
|
|
96 |
|
|
|
97 |
# Make list |
|
|
98 |
listA=mclapply(1:length(Onc.pathways[,1]), function(i){A=as.character(Onc.pathways[i,3:length(Onc.pathways),]) |
|
|
99 |
B=A[!A==""&!A=="NA"]}, mc.cores=6) |
|
|
100 |
names(listA) <- toupper(Onc.pathways[,1]) |
|
|
101 |
viz_scores=gsva(expr = gexp, gset.idx.list = listA, parallel.sz=8, tau=0.25) |
|
|
102 |
rownames(viz_scores)=paste0("N:GSVA:", rownames(viz_scores)) |
|
|
103 |
fm.add=rbind(fm, viz_scores) |
|
|
104 |
|
|
|
105 |
fm.add=fm.add[!(!grepl("^B:GNAB:*.*chr*.*y_n_somatic", rownames(fm.add))&grepl("^B:GNAB:", rownames(fm.add))),!is.na(fm.add["N:RPPA:PARP1:chr1:226548392:226595780:-:PARP_cleaved",])] |
|
|
106 |
|
|
|
107 |
annotdf=data.frame("CytolyticScore"=as.numeric(fm["N:SAMP:CytolyticScore",]), "CASP7"=as.numeric(fm["N:RPPA:CASP7:chr10:115438942:115490662:+:Caspase-7_cleavedD198",]), "IRF1"=as.numeric(fm["N:RPPA:IRF1:chr5:131817301:131826490:-:IRF-1",])) |
|
|
108 |
rownames(annotdf)=colnames(fm) |
|
|
109 |
|
|
|
110 |
annotdf=annotdf[!is.na(annotdf$CASP7),] |
|
|
111 |
|
|
|
112 |
pdf("FigureS2B.pdf", height = 2.5, width = 2.5) |
|
|
113 |
ggscatter(annotdf, x = "CytolyticScore", y = "CASP7", |
|
|
114 |
size = 1.5, |
|
|
115 |
add = "reg.line", # Add regression line |
|
|
116 |
add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line |
|
|
117 |
conf.int = TRUE # Add confidence interval |
|
|
118 |
) + |
|
|
119 |
stat_cor(method = "pearson", label.sep = "\n") + |
|
|
120 |
xlab("Cytolytic Score") + |
|
|
121 |
ylab("CASP7") |
|
|
122 |
|
|
|
123 |
ggscatter(annotdf, x = "CytolyticScore", y = "IRF1", |
|
|
124 |
size = 1.5, |
|
|
125 |
add = "reg.line", # Add regression line |
|
|
126 |
add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line |
|
|
127 |
conf.int = TRUE # Add confidence interval |
|
|
128 |
) + |
|
|
129 |
stat_cor(method = "pearson", label.sep = "\n") + |
|
|
130 |
xlab("Cytolytic Score") + |
|
|
131 |
ylab("IRF1") |
|
|
132 |
dev.off() |