|
a |
|
b/FigS6C_Hemap_CGA_dotplots.R |
|
|
1 |
GIT_HOME="/research/users/ppolonen/git_home/ImmunogenomicLandscape-BloodCancers/" |
|
|
2 |
source(file.path(GIT_HOME, "common_scripts/visualisation/plotting_functions.R")) |
|
|
3 |
|
|
|
4 |
# Plot CGA expression dot plots for (Figure S6C) |
|
|
5 |
|
|
|
6 |
library(parallel) |
|
|
7 |
library(gridExtra) |
|
|
8 |
library(ggplot2) |
|
|
9 |
library(ggrepel) |
|
|
10 |
library(cowplot) |
|
|
11 |
|
|
|
12 |
# load Hemap gene expression data |
|
|
13 |
data = get(load("data9544_with_gene_symbols.RData")) |
|
|
14 |
|
|
|
15 |
# load Hemap annotations |
|
|
16 |
annot = get(load("Hemap_immunology_Annotations.Rdata")) |
|
|
17 |
|
|
|
18 |
data=data[annot[,1],] |
|
|
19 |
|
|
|
20 |
# plotting functions |
|
|
21 |
FUN_PLOT=function(gene, logicalVectors, namesLV, data=NULL, matrix=NULL, col=NULL, ORDER=F, RANGE=NULL) { |
|
|
22 |
if(is.null(matrix)&is.null(data))stop("No data to plot, check data/matrix") |
|
|
23 |
|
|
|
24 |
GNAME=gsub("N:....:|:::::|DUFVA_", "", gene) |
|
|
25 |
GNAME=gsub("_", " ", GNAME) |
|
|
26 |
namesLV=gsub("Cancer_", " ", namesLV) |
|
|
27 |
|
|
|
28 |
cols <- read.table("colors_hemap_immunology.tsv", header = TRUE, sep = "\t", comment.char = " ") |
|
|
29 |
samples <- gsub("LCH", "MDS", gsub("AITL|PTCLNOS|ALCL", "TCL", gsub("FL|MALT", "BCL", gsub("Healthy", "NonCancerHealthy", gsub("_", "", names(logicalVectors)))))) |
|
|
30 |
|
|
|
31 |
if(is.null(col)){ |
|
|
32 |
col=as.character(cols[match(samples, cols$sample),2]) |
|
|
33 |
} |
|
|
34 |
|
|
|
35 |
|
|
|
36 |
if(!is.null(matrix)){ |
|
|
37 |
gene2=ifelse(grepl("GEXP", gene), gene, paste0("'N:GEXP:", gene, ":::::'")) |
|
|
38 |
D=as.numeric(read.delim(pipe(paste0("grep -Fw ", gene2, " ", matrix)), row.names = 1, header=F)) |
|
|
39 |
} |
|
|
40 |
|
|
|
41 |
if(!is.null(data)){ |
|
|
42 |
D = as.numeric(data[,colnames(data)%in%gene]) |
|
|
43 |
} |
|
|
44 |
|
|
|
45 |
bplot_list=lapply(logicalVectors, function(v){ |
|
|
46 |
D[v] |
|
|
47 |
}) |
|
|
48 |
names(bplot_list)=gsub("_", " ", namesLV) |
|
|
49 |
|
|
|
50 |
if(ORDER){ |
|
|
51 |
ord=sapply(bplot_list, median) |
|
|
52 |
col=col[order(ord, decreasing = T)] |
|
|
53 |
bplot_list=bplot_list[order(ord, decreasing = T)] |
|
|
54 |
|
|
|
55 |
} |
|
|
56 |
|
|
|
57 |
plots=FUNCTION_PLOT_LIST(bplot_list, gene, col, ORDER, RANGE) |
|
|
58 |
return(plots) |
|
|
59 |
} |
|
|
60 |
|
|
|
61 |
|
|
|
62 |
FUNCTION_PLOT_LIST=function(bplot_list, GNAME, col, ORDER, RANGE){ |
|
|
63 |
|
|
|
64 |
df=melt(bplot_list) |
|
|
65 |
|
|
|
66 |
df$class <- factor(df[,2], levels = unique(as.character(df[,2])),ordered = TRUE) |
|
|
67 |
|
|
|
68 |
df$Expression=as.numeric(as.vector(df[,1])) |
|
|
69 |
p <- ggplot(data=df, aes(x=class, y=Expression, color=class)) + |
|
|
70 |
geom_jitter(width = 0.25, size = 0.1) + |
|
|
71 |
scale_color_manual(values = col) |
|
|
72 |
|
|
|
73 |
p2 <- p + |
|
|
74 |
|
|
|
75 |
#theme with white background |
|
|
76 |
theme_bw() + |
|
|
77 |
|
|
|
78 |
# titles |
|
|
79 |
theme(plot.title = element_text(face="italic", color="black", size=16, hjust=0)) + |
|
|
80 |
theme(axis.title = element_text(color="black", face=NULL, size=12,angle = 90)) + |
|
|
81 |
theme(axis.title.y = element_text(size = 14, angle = 90, color="black", face=NULL)) + |
|
|
82 |
guides(color = FALSE) + |
|
|
83 |
|
|
|
84 |
ylab("Expression (log2)") + |
|
|
85 |
xlab("") + |
|
|
86 |
labs(title=GNAME) + |
|
|
87 |
#eliminates background, gridlines, and chart border |
|
|
88 |
theme(plot.background = element_blank(), |
|
|
89 |
panel.grid.major = element_blank(), |
|
|
90 |
panel.grid.minor = element_blank())+ |
|
|
91 |
theme(panel.border= element_blank())+ |
|
|
92 |
|
|
|
93 |
#draws x and y axis line |
|
|
94 |
theme(axis.line = element_line(), |
|
|
95 |
axis.line.x = element_line(color="black", size = 0.5), |
|
|
96 |
axis.line.y = element_line(color="black", size = 0.5)) + |
|
|
97 |
|
|
|
98 |
# X - axis text |
|
|
99 |
theme(axis.text.x = element_text(angle=45, hjust=1, color="black", size = 14, face=NULL), |
|
|
100 |
axis.text.y = element_text(hjust=1, color="black", size = 12, face=NULL))+ |
|
|
101 |
|
|
|
102 |
# if want to limit to range |
|
|
103 |
if(!is.null(RANGE))scale_y_continuous(breaks=seq(2,14,2), limits = RANGE) |
|
|
104 |
|
|
|
105 |
return(p2) |
|
|
106 |
} |
|
|
107 |
|
|
|
108 |
|
|
|
109 |
boxplots_grid_topcga_subtypes <- function(x){ |
|
|
110 |
p.all=lapply(c("MAGEC1", "MAGEC2", "MORC1", "DSCR8", "MAGEB1", "MAGEB2", "ADAM29", "DMRT1", "SAGE1"), FUN_PLOT, logicalVectors, namesLV=names(logicalVectors), data=data, ORDER=F) |
|
|
111 |
ggsave(paste0(GENELIST, ".pdf"), do.call(marrangeGrob, append(list(grobs=p.all, nrow=3, ncol=3),list(top=NULL))), width = 350 , height = 250, units = "mm", dpi=250) |
|
|
112 |
} |
|
|
113 |
|
|
|
114 |
|
|
|
115 |
# make logical vector with cancer subtypes and healthy sample |
|
|
116 |
annot$logicalvector <- annot$Sample.type |
|
|
117 |
annot$logicalvector[annot$Sample.type!="NonCancerHealthy"] <- annot$Category.specifying.subtype[annot$Sample.type!="NonCancerHealthy"] |
|
|
118 |
annot$logicalvector[annot$Sample.type=="NonCancerHealthy"] <- "Healthy" |
|
|
119 |
annot$logicalvector[annot$Category.specifying.lineage.tumor.origin=="AML"] <- "AML" |
|
|
120 |
annot$logicalvector[annot$Category.specifying.lineage.tumor.origin=="MDS"] <- "MDS" |
|
|
121 |
annot$logicalvector[annot$Category.specifying.lineage.tumor.origin=="CLL"] <- "CLL" |
|
|
122 |
annot$logicalvector[annot$Category.specifying.subtype=="AILT"] <- "AITL" |
|
|
123 |
annot$logicalvector[annot$Category.specifying.subtype=="LC"] <- "LCH" |
|
|
124 |
|
|
|
125 |
# plot selected genes |
|
|
126 |
logicalVectors=get.logical(annovector = list(annot$logicalvector), filterv = annot$Sample.type%in%c("Cancer", "Prolif", "NonCancerHealthy"), PREFIX = "") |
|
|
127 |
logicalVectors=logicalVectors[paste0(c("MM", "DLBCL", "MCL", "CHL", "FL", "MALT", "ALCL", "PTCLNOS", "AITL", "T-ALL", "pre-B-ALL", "AML", "MDS", "LCH", "CML", "CLL", "Healthy"), "_")] |
|
|
128 |
GENELIST="FigureS6C_Hemap_CGA_dotplots" |
|
|
129 |
boxplots_grid_topcga_subtypes() |