ImmunogenomicLandscape-BC / Git / Diff of /FigS2AB_microenvironment_analysis_GSEA

Models:

WandaB/

ImmunogenomicLandscape-BC

Downloads: 1

Diff of /FigS2AB_microenvironment_analysis_GSEA_GSVA.R [000000] .. [8e0848]

Switch to unified view

 b/FigS2AB_microenvironment_analysis_GSEA_GSVA.R
+GIT_HOME="/research/users/ppolonen/git_home/ImmunogenomicLandscape-BloodCancers/"
+source(file.path(GIT_HOME, "common_scripts/featurematrix/functions_generate_fm.R"))
+source(file.path(GIT_HOME, "common_scripts/visualisation/plotting_functions.R"))
+source(file.path(GIT_HOME, "common_scripts/pathway_analysis/functions.GSEA.R"))
+library(GSVA)
+library(parallel)
+# FigureS2 A-B related analysis
+setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/Published_data_figures")
+# annotations
+annot=get(load("Hemap_immunology_Annotations.Rdata"))
+load("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/data/data9544_with_gene_symbols.RData")
+data = t(data[rownames(data)%in%annot$GSM.identifier..sample.,])
+subclass2=get.logical(annovector = list(annot$subclasses), filterv = annot$CELLS_SORTED==0&annot$Sample.type%in%c("Prolif", "Cancer"))
+tbly2=get.logical(annovector = list(annot$tbLY), filterv = annot$CELLS_SORTED==0&annot$Sample.type%in%c("Prolif", "Cancer"))
+tbly2=tbly2[!names(tbly2)%in%"Lymphoma_BCL"]
+list_cancers=lapply(c(subclass2, tbly2), list)
+list_cancers=unlist(list_cancers, recursive=F)
+l=c("AML", "BCL_DLBCL", "CLL", "CML", "Lymphoma_BCL_CHL", "Lymphoma_BCL_MCL", "Lymphoma_BCL_FL", "pre-B-ALL", "Prolif_Myeloproliferative_MDS", "T-ALL")
+logicalvectors=list_cancers[names(list_cancers)%in%l]
+#*********************** run GSEA using cytolyticScore as ranking metric on each of the disease: ***********************
+GENESETS=file.path(getwd(), "HALLMARKS.gmt")
+WD=file.path(getwd(), "GSEA")
+OUTDIR=file.path(getwd(), "GSEA")
+names(logicalvectors)=gsub("-", "_", names(logicalvectors))
+res=parallel::mclapply(seq(logicalvectors), function(i){
+  filt=logicalvectors[[i]]
+  command=run.GSEA(data=data[,filt], cls.vector = as.numeric(annot$CytolyticScore[filt]), datatype = "N", GENESETS = GENESETS, dataname = paste0(names(logicalvectors)[i]), clsname = "CytolyticScore", WD=WD, OUTDIR=OUTDIR)
+  try(system(command))
+}, mc.cores=5)
+#***********************************************************************************************************************
+#************** run GSEA using cytolyticScore as ranking metric on TNK cells vs other normal ******************
+cls=annot$immunoNormals%in%c("CD8+Tcell", "NKCell")
+filt=!annot$immunoNormals%in%c("")
+command=run.GSEA(data=data[,filt], cls.vector = as.numeric(cls[filt]), datatype = "B", GENESETS = GENESETS, dataname = "CD8_NK_pathways", clsname = "CD8_NK", WD=WD, OUTDIR=OUTDIR)
+try(system(command))
+#***********************************************************************************************************************
+# combine results to same matrix:
+datp=list.files(path = OUTDIR, pattern = "gsea_report_for_1_*.*.xls|gsea_report_for_feat_pos.*..xls", recursive = T, full.names = T)
+datn=list.files(path = OUTDIR, pattern = "gsea_report_for_0_*.*.xls|gsea_report_for_feat_neg.*..xls", recursive = T, full.names = T)
+# filter to contain only significant
+pwnames=scan(pipe("cut -f1 HALLMARKS.gmt"), "genesets")
+FUN=function(f){
+  dat=try(read.delim(f, stringsAsFactors=F, header=T), silent = T)
+  dat$FDR.q.val[dat$FDR.q.val==0]=0.0001
+  # variable.1, variable.2, features (y name), id (x name)
+  d=data.frame("features"=gsub("-MSIGDB_HALLMARKS", "", dat$NAME), "variable.1"=dat$NES, "variable.2"=-log10(as.numeric(dat$FDR.q.val)), "id"=gsub("_CytolyticScore_.*.|_pathways_.*.|Prolif_Myeloproliferative_|Lymphoma_BCL_|^/BCL_|/","", gsub(eval(OUTDIR), "", f)))
+}
+pwp=mclapply(datp, FUN, mc.cores=10)
+pwp2=mclapply(datn, FUN, mc.cores=10)
+dat=do.call(rbind, pwp)
+dat2=do.call(rbind, pwp2)
+dat_comb=rbind(dat, dat2)
+dat_comb$id=factor(gsub("_", "-", dat_comb$id), levels=c("DLBCL","CHL", "FL", "MCL", "CLL", "CML", "AML", "MDS", "pre-B-ALL", "T-ALL", "CD8-NK"))
+pdf("FigureS2A.pdf", width = 5.5, height = 6.5)
+plot.DotPlot.df(data.plot = dat_comb, cols = c("blue", "white", "red"), fontsize = 8, name.variable.1 = "NES", name.variable.2 = "-log10 FDR", dot.scale = 3)
+dev.off()
+# plot correlation plot between protein and Pathway
+fm=get(load("TCGA_DLBCL_FM_DUFVA.Rdata"))
+# run GSVA for certain pathways and add to matrix:
+gexp=data.matrix(fm[grepl("N:GEXP", rownames(fm)),])
+rownames(gexp)=gsub(":.*.", "", gsub("N:GEXP:", "",rownames(gexp)))
+# gene sets:
+GENESETS="Combined_pathway_signatures_2017_filtered_robust.gmt"
+# get GSVA visualization for the pathways
+# Geneset list
+Onc.pathways=read.delim(GENESETS, stringsAsFactors = FALSE, header=F, col.names = paste("V",1:max(count.fields(GENESETS, sep = '\t'), na.rm = T)), fill = TRUE)
+# Make list
+listA=mclapply(1:length(Onc.pathways[,1]), function(i){A=as.character(Onc.pathways[i,3:length(Onc.pathways),])
+B=A[!A==""&!A=="NA"]}, mc.cores=6)
+names(listA) <- toupper(Onc.pathways[,1])
+viz_scores=gsva(expr = gexp, gset.idx.list = listA, parallel.sz=8, tau=0.25)
+rownames(viz_scores)=paste0("N:GSVA:", rownames(viz_scores))
+fm.add=rbind(fm, viz_scores)
+fm.add=fm.add[!(!grepl("^B:GNAB:*.*chr*.*y_n_somatic", rownames(fm.add))&grepl("^B:GNAB:", rownames(fm.add))),!is.na(fm.add["N:RPPA:PARP1:chr1:226548392:226595780:-:PARP_cleaved",])]
+annotdf=data.frame("CytolyticScore"=as.numeric(fm["N:SAMP:CytolyticScore",]), "CASP7"=as.numeric(fm["N:RPPA:CASP7:chr10:115438942:115490662:+:Caspase-7_cleavedD198",]), "IRF1"=as.numeric(fm["N:RPPA:IRF1:chr5:131817301:131826490:-:IRF-1",]))
+rownames(annotdf)=colnames(fm)
+annotdf=annotdf[!is.na(annotdf$CASP7),]
+pdf("FigureS2B.pdf", height = 2.5, width = 2.5)
+ggscatter(annotdf, x = "CytolyticScore", y = "CASP7",
+          size = 1.5,
+          add = "reg.line",  # Add regression line
+          add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line
+          conf.int = TRUE # Add confidence interval
+) +
+  stat_cor(method = "pearson", label.sep = "\n") +
+  xlab("Cytolytic Score") +
+  ylab("CASP7")
+ggscatter(annotdf, x = "CytolyticScore", y = "IRF1",
+          size = 1.5,
+          add = "reg.line",  # Add regression line
+          add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line
+          conf.int = TRUE # Add confidence interval
+) +
+  stat_cor(method = "pearson", label.sep = "\n") +
+  xlab("Cytolytic Score") +
+  ylab("IRF1")
+dev.off()