Switch to unified view

a b/FigS6C_Hemap_CGA_dotplots.R
1
GIT_HOME="/research/users/ppolonen/git_home/ImmunogenomicLandscape-BloodCancers/"
2
source(file.path(GIT_HOME, "common_scripts/visualisation/plotting_functions.R"))
3
4
# Plot CGA expression dot plots for (Figure S6C)
5
6
library(parallel)
7
library(gridExtra)
8
library(ggplot2)
9
library(ggrepel)
10
library(cowplot)
11
12
# load Hemap gene expression data
13
data = get(load("data9544_with_gene_symbols.RData"))
14
15
# load Hemap annotations
16
annot = get(load("Hemap_immunology_Annotations.Rdata"))
17
18
data=data[annot[,1],]
19
20
# plotting functions
21
FUN_PLOT=function(gene, logicalVectors, namesLV, data=NULL, matrix=NULL, col=NULL, ORDER=F, RANGE=NULL) {
22
  if(is.null(matrix)&is.null(data))stop("No data to plot, check data/matrix")
23
  
24
  GNAME=gsub("N:....:|:::::|DUFVA_", "", gene)
25
  GNAME=gsub("_", " ", GNAME)
26
  namesLV=gsub("Cancer_", " ", namesLV)
27
  
28
  cols <- read.table("colors_hemap_immunology.tsv", header = TRUE, sep = "\t", comment.char = " ")
29
  samples <- gsub("LCH", "MDS", gsub("AITL|PTCLNOS|ALCL", "TCL", gsub("FL|MALT", "BCL", gsub("Healthy", "NonCancerHealthy", gsub("_", "", names(logicalVectors))))))
30
31
  if(is.null(col)){
32
    col=as.character(cols[match(samples, cols$sample),2])
33
  }
34
  
35
  
36
  if(!is.null(matrix)){
37
    gene2=ifelse(grepl("GEXP", gene), gene, paste0("'N:GEXP:", gene, ":::::'"))
38
    D=as.numeric(read.delim(pipe(paste0("grep -Fw ", gene2, " ", matrix)), row.names = 1, header=F))
39
  }
40
  
41
  if(!is.null(data)){
42
    D = as.numeric(data[,colnames(data)%in%gene])
43
  }
44
  
45
  bplot_list=lapply(logicalVectors, function(v){
46
    D[v]
47
  })
48
  names(bplot_list)=gsub("_", " ", namesLV)
49
  
50
  if(ORDER){
51
    ord=sapply(bplot_list, median)
52
    col=col[order(ord, decreasing = T)]
53
    bplot_list=bplot_list[order(ord, decreasing = T)]
54
    
55
  }
56
  
57
  plots=FUNCTION_PLOT_LIST(bplot_list, gene, col, ORDER, RANGE)
58
  return(plots)
59
}
60
61
62
FUNCTION_PLOT_LIST=function(bplot_list, GNAME, col, ORDER, RANGE){
63
  
64
  df=melt(bplot_list)
65
  
66
  df$class <- factor(df[,2], levels = unique(as.character(df[,2])),ordered = TRUE)
67
  
68
  df$Expression=as.numeric(as.vector(df[,1]))
69
  p <- ggplot(data=df, aes(x=class, y=Expression, color=class)) +  
70
    geom_jitter(width = 0.25, size = 0.1) +
71
    scale_color_manual(values = col)
72
  
73
  p2 <- p +
74
    
75
    #theme with white background
76
    theme_bw() +
77
    
78
    # titles
79
    theme(plot.title = element_text(face="italic", color="black", size=16, hjust=0)) +
80
    theme(axis.title = element_text(color="black", face=NULL, size=12,angle = 90)) +
81
    theme(axis.title.y = element_text(size = 14, angle = 90, color="black", face=NULL)) +
82
    guides(color = FALSE) +
83
    
84
    ylab("Expression (log2)") +
85
    xlab("") +
86
    labs(title=GNAME) +
87
    #eliminates background, gridlines, and chart border
88
    theme(plot.background = element_blank(),
89
          panel.grid.major = element_blank(),
90
          panel.grid.minor = element_blank())+
91
    theme(panel.border= element_blank())+
92
93
    #draws x and y axis line
94
    theme(axis.line = element_line(),
95
          axis.line.x = element_line(color="black", size = 0.5),
96
          axis.line.y = element_line(color="black", size = 0.5)) +
97
    
98
    # X - axis text
99
    theme(axis.text.x = element_text(angle=45, hjust=1, color="black", size = 14, face=NULL),
100
          axis.text.y = element_text(hjust=1, color="black", size = 12, face=NULL))+ 
101
    
102
    # if want to limit to range
103
    if(!is.null(RANGE))scale_y_continuous(breaks=seq(2,14,2), limits = RANGE)
104
  
105
  return(p2)
106
}
107
108
109
boxplots_grid_topcga_subtypes <- function(x){
110
  p.all=lapply(c("MAGEC1", "MAGEC2", "MORC1", "DSCR8", "MAGEB1", "MAGEB2", "ADAM29", "DMRT1", "SAGE1"), FUN_PLOT, logicalVectors, namesLV=names(logicalVectors), data=data, ORDER=F)
111
  ggsave(paste0(GENELIST, ".pdf"), do.call(marrangeGrob, append(list(grobs=p.all, nrow=3, ncol=3),list(top=NULL))), width = 350 , height = 250, units = "mm", dpi=250)
112
}
113
114
115
# make logical vector with cancer subtypes and healthy sample
116
annot$logicalvector <- annot$Sample.type
117
annot$logicalvector[annot$Sample.type!="NonCancerHealthy"] <- annot$Category.specifying.subtype[annot$Sample.type!="NonCancerHealthy"]
118
annot$logicalvector[annot$Sample.type=="NonCancerHealthy"] <- "Healthy"
119
annot$logicalvector[annot$Category.specifying.lineage.tumor.origin=="AML"] <- "AML"
120
annot$logicalvector[annot$Category.specifying.lineage.tumor.origin=="MDS"] <- "MDS"
121
annot$logicalvector[annot$Category.specifying.lineage.tumor.origin=="CLL"] <- "CLL"
122
annot$logicalvector[annot$Category.specifying.subtype=="AILT"] <- "AITL"
123
annot$logicalvector[annot$Category.specifying.subtype=="LC"] <- "LCH"
124
125
# plot selected genes 
126
logicalVectors=get.logical(annovector = list(annot$logicalvector), filterv = annot$Sample.type%in%c("Cancer", "Prolif", "NonCancerHealthy"), PREFIX = "")
127
logicalVectors=logicalVectors[paste0(c("MM", "DLBCL", "MCL", "CHL", "FL", "MALT", "ALCL", "PTCLNOS", "AITL", "T-ALL", "pre-B-ALL", "AML", "MDS", "LCH", "CML", "CLL", "Healthy"), "_")]
128
GENELIST="FigureS6C_Hemap_CGA_dotplots"
129
boxplots_grid_topcga_subtypes()