Switch to unified view

a b/FigS2AB_microenvironment_analysis_GSEA_GSVA.R
1
GIT_HOME="/research/users/ppolonen/git_home/ImmunogenomicLandscape-BloodCancers/"
2
source(file.path(GIT_HOME, "common_scripts/featurematrix/functions_generate_fm.R"))
3
source(file.path(GIT_HOME, "common_scripts/visualisation/plotting_functions.R"))
4
source(file.path(GIT_HOME, "common_scripts/pathway_analysis/functions.GSEA.R"))
5
6
library(GSVA)
7
library(parallel)
8
9
# FigureS2 A-B related analysis
10
setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/Published_data_figures")
11
12
# annotations
13
annot=get(load("Hemap_immunology_Annotations.Rdata"))
14
15
load("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/data/data9544_with_gene_symbols.RData")
16
data = t(data[rownames(data)%in%annot$GSM.identifier..sample.,])
17
18
subclass2=get.logical(annovector = list(annot$subclasses), filterv = annot$CELLS_SORTED==0&annot$Sample.type%in%c("Prolif", "Cancer"))
19
tbly2=get.logical(annovector = list(annot$tbLY), filterv = annot$CELLS_SORTED==0&annot$Sample.type%in%c("Prolif", "Cancer"))
20
tbly2=tbly2[!names(tbly2)%in%"Lymphoma_BCL"]
21
22
list_cancers=lapply(c(subclass2, tbly2), list)
23
list_cancers=unlist(list_cancers, recursive=F)
24
25
l=c("AML", "BCL_DLBCL", "CLL", "CML", "Lymphoma_BCL_CHL", "Lymphoma_BCL_MCL", "Lymphoma_BCL_FL", "pre-B-ALL", "Prolif_Myeloproliferative_MDS", "T-ALL")
26
logicalvectors=list_cancers[names(list_cancers)%in%l]
27
28
#*********************** run GSEA using cytolyticScore as ranking metric on each of the disease: ***********************
29
30
GENESETS=file.path(getwd(), "HALLMARKS.gmt")
31
WD=file.path(getwd(), "GSEA")
32
OUTDIR=file.path(getwd(), "GSEA")
33
34
names(logicalvectors)=gsub("-", "_", names(logicalvectors))
35
36
res=parallel::mclapply(seq(logicalvectors), function(i){
37
  filt=logicalvectors[[i]]
38
  command=run.GSEA(data=data[,filt], cls.vector = as.numeric(annot$CytolyticScore[filt]), datatype = "N", GENESETS = GENESETS, dataname = paste0(names(logicalvectors)[i]), clsname = "CytolyticScore", WD=WD, OUTDIR=OUTDIR)
39
  try(system(command))
40
}, mc.cores=5)
41
42
#***********************************************************************************************************************
43
44
#************** run GSEA using cytolyticScore as ranking metric on TNK cells vs other normal ******************
45
cls=annot$immunoNormals%in%c("CD8+Tcell", "NKCell")
46
filt=!annot$immunoNormals%in%c("")
47
48
command=run.GSEA(data=data[,filt], cls.vector = as.numeric(cls[filt]), datatype = "B", GENESETS = GENESETS, dataname = "CD8_NK_pathways", clsname = "CD8_NK", WD=WD, OUTDIR=OUTDIR)
49
try(system(command))
50
51
#***********************************************************************************************************************
52
53
# combine results to same matrix:
54
datp=list.files(path = OUTDIR, pattern = "gsea_report_for_1_*.*.xls|gsea_report_for_feat_pos.*..xls", recursive = T, full.names = T)
55
datn=list.files(path = OUTDIR, pattern = "gsea_report_for_0_*.*.xls|gsea_report_for_feat_neg.*..xls", recursive = T, full.names = T)
56
57
# filter to contain only significant
58
pwnames=scan(pipe("cut -f1 HALLMARKS.gmt"), "genesets")
59
60
FUN=function(f){
61
  
62
  dat=try(read.delim(f, stringsAsFactors=F, header=T), silent = T)
63
  dat$FDR.q.val[dat$FDR.q.val==0]=0.0001
64
  # variable.1, variable.2, features (y name), id (x name)
65
  d=data.frame("features"=gsub("-MSIGDB_HALLMARKS", "", dat$NAME), "variable.1"=dat$NES, "variable.2"=-log10(as.numeric(dat$FDR.q.val)), "id"=gsub("_CytolyticScore_.*.|_pathways_.*.|Prolif_Myeloproliferative_|Lymphoma_BCL_|^/BCL_|/","", gsub(eval(OUTDIR), "", f)))
66
  
67
}
68
69
pwp=mclapply(datp, FUN, mc.cores=10)
70
pwp2=mclapply(datn, FUN, mc.cores=10)
71
72
dat=do.call(rbind, pwp)
73
dat2=do.call(rbind, pwp2)
74
75
dat_comb=rbind(dat, dat2)
76
77
dat_comb$id=factor(gsub("_", "-", dat_comb$id), levels=c("DLBCL","CHL", "FL", "MCL", "CLL", "CML", "AML", "MDS", "pre-B-ALL", "T-ALL", "CD8-NK"))
78
79
pdf("FigureS2A.pdf", width = 5.5, height = 6.5)
80
plot.DotPlot.df(data.plot = dat_comb, cols = c("blue", "white", "red"), fontsize = 8, name.variable.1 = "NES", name.variable.2 = "-log10 FDR", dot.scale = 3)
81
dev.off()
82
83
# plot correlation plot between protein and Pathway
84
fm=get(load("TCGA_DLBCL_FM_DUFVA.Rdata"))
85
86
# run GSVA for certain pathways and add to matrix:
87
gexp=data.matrix(fm[grepl("N:GEXP", rownames(fm)),])
88
rownames(gexp)=gsub(":.*.", "", gsub("N:GEXP:", "",rownames(gexp)))
89
90
# gene sets:
91
GENESETS="Combined_pathway_signatures_2017_filtered_robust.gmt"
92
93
# get GSVA visualization for the pathways
94
# Geneset list
95
Onc.pathways=read.delim(GENESETS, stringsAsFactors = FALSE, header=F, col.names = paste("V",1:max(count.fields(GENESETS, sep = '\t'), na.rm = T)), fill = TRUE)
96
97
# Make list
98
listA=mclapply(1:length(Onc.pathways[,1]), function(i){A=as.character(Onc.pathways[i,3:length(Onc.pathways),])
99
B=A[!A==""&!A=="NA"]}, mc.cores=6)
100
names(listA) <- toupper(Onc.pathways[,1])
101
viz_scores=gsva(expr = gexp, gset.idx.list = listA, parallel.sz=8, tau=0.25)
102
rownames(viz_scores)=paste0("N:GSVA:", rownames(viz_scores))
103
fm.add=rbind(fm, viz_scores)
104
105
fm.add=fm.add[!(!grepl("^B:GNAB:*.*chr*.*y_n_somatic", rownames(fm.add))&grepl("^B:GNAB:", rownames(fm.add))),!is.na(fm.add["N:RPPA:PARP1:chr1:226548392:226595780:-:PARP_cleaved",])]
106
107
annotdf=data.frame("CytolyticScore"=as.numeric(fm["N:SAMP:CytolyticScore",]), "CASP7"=as.numeric(fm["N:RPPA:CASP7:chr10:115438942:115490662:+:Caspase-7_cleavedD198",]), "IRF1"=as.numeric(fm["N:RPPA:IRF1:chr5:131817301:131826490:-:IRF-1",]))
108
rownames(annotdf)=colnames(fm)
109
110
annotdf=annotdf[!is.na(annotdf$CASP7),]
111
112
pdf("FigureS2B.pdf", height = 2.5, width = 2.5)
113
ggscatter(annotdf, x = "CytolyticScore", y = "CASP7",
114
          size = 1.5,
115
          add = "reg.line",  # Add regression line
116
          add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line
117
          conf.int = TRUE # Add confidence interval
118
) +
119
  stat_cor(method = "pearson", label.sep = "\n") +
120
  xlab("Cytolytic Score") +
121
  ylab("CASP7")
122
123
ggscatter(annotdf, x = "CytolyticScore", y = "IRF1",
124
          size = 1.5,
125
          add = "reg.line",  # Add regression line
126
          add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line
127
          conf.int = TRUE # Add confidence interval
128
) +
129
  stat_cor(method = "pearson", label.sep = "\n") +
130
  xlab("Cytolytic Score") +
131
  ylab("IRF1")
132
dev.off()