|
a |
|
b/R/compDrugsen.R |
|
|
1 |
#' @name compDrugsen |
|
|
2 |
#' @title Comparison of drug sensitivity |
|
|
3 |
#' @description This function estimates the IC50 of specific drugs for each Subtype by developing a ridge regression predictive model based on all/specific cell lines derived from Genomics of Drug Sensitivity in Cancer (GDSC, \url{https://www.cancerrxgene.org/}). |
|
|
4 |
#' @param moic.res An object returned by `getMOIC()` with one specified algorithm or `get\%algorithm_name\%` or `getConsensusMOIC()` with a list of multiple algorithms. |
|
|
5 |
#' @param norm.expr A matrix of normalized expression data with rows for genes and columns for samples; FPKM or TPM without log2 transformation is recommended. |
|
|
6 |
#' @param drugs A string vector to indicate the names of the drugs for which you would like to predict sensitivity, one of Erlotinib, Rapamycin, Sunitinib, PHA-665752, MG-132, Paclitaxel, Cyclopamine, AZ628, Sorafenib, VX-680, Imatinib, TAE684, Crizotinib, Saracatinib, S-Trityl-L-cysteine, Z-LLNle-CHO, Dasatinib, GNF-2, CGP-60474, CGP-082996, A-770041, WH-4-023, WZ-1-84, BI-2536, BMS-536924, BMS-509744, CMK, Pyrimethamine, JW-7-52-1, A-443654, GW843682X, MS-275, Parthenolide, KIN001-135, TGX221, Bortezomib, XMD8-85, Roscovitine, Salubrinal, Lapatinib, GSK269962A, Doxorubicin, Etoposide, Gemcitabine, Mitomycin C, Vinorelbine, NSC-87877, Bicalutamide, QS11, CP466722, Midostaurin, CHIR-99021, AP-24534, AZD6482, JNK-9L, PF-562271, HG-6-64-1, JQ1, JQ12, DMOG, FTI-277, OSU-03012, Shikonin, AKT inhibitor VIII, Embelin, FH535, PAC-1, IPA-3, GSK-650394, BAY 61-3606, 5-Fluorouracil, Thapsigargin, Obatoclax Mesylate, BMS-754807, Lisitinib, Bexarotene, Bleomycin, LFM-A13, GW-2580, AUY922, Phenformin, Bryostatin 1, Pazopanib, LAQ824, Epothilone B, GSK1904529A, BMS345541, Tipifarnib, BMS-708163, Ruxolitinib, AS601245, Ispinesib Mesylate, TL-2-105, AT-7519, TAK-715, BX-912, ZSTK474, AS605240, Genentech Cpd 10, GSK1070916, KIN001-102, LY317615, GSK429286A, FMK, QL-XII-47, CAL-101, UNC0638, XL-184, WZ3105, XMD14-99, AC220, CP724714, JW-7-24-1, NPK76-II-72-1, STF-62247, NG-25, TL-1-85, VX-11e, FR-180204, Tubastatin A, Zibotentan, YM155, NSC-207895, VNLG/124, AR-42, CUDC-101, Belinostat, I-BET-762, CAY10603, Linifanib , BIX02189, CH5424802, EKB-569, GSK2126458, KIN001-236, KIN001-244, KIN001-055, KIN001-260, KIN001-266, Masitinib, MP470, MPS-1-IN-1, BHG712, OSI-930, OSI-027, CX-5461, PHA-793887, PI-103, PIK-93, SB52334, TPCA-1, TG101348, Foretinib, Y-39983, YM201636, Tivozanib, GSK690693, SNX-2112, QL-XI-92, XMD13-2, QL-X-138, XMD15-27; two common chemodrugs (i.e., Cisplatin and Paclitaxel) will be analyzed by default if no indication. |
|
|
7 |
#' @param tissueType A string value to specify if you would like to train the models on only a subset of the CGP cell lines (based on the tissue type from which the cell lines originated); Allowed values contain c("all", "aero_digestive_tract", "blood", "bone", "breast", "digestive_system", "lung", "nervous_system", "skin", "urogenital_system") and "all" by default. |
|
|
8 |
#' @param clust.col A string vector storing colors for annotating each Subtype. |
|
|
9 |
#' @param prefix A string value to indicate the prefix of the output plot. |
|
|
10 |
#' @param fig.path A string value to indicate the output path for storing the boxviolin plot. |
|
|
11 |
#' @param seed A integer value to indicate the seed for reproducing ridge regression. |
|
|
12 |
#' @param width A numeric value to indicate the width of boxviolin plot. |
|
|
13 |
#' @param height A numeric value to indicate the height of boxviolin plot. |
|
|
14 |
#' @param test.method A string value to indicate the method for statistical testing. Allowed values contain c('nonparametric', 'parametric'); nonparametric means two-sample wilcoxon rank sum test for two subtypes and Kruskal-Wallis rank sum test for multiple subtypes; parametric means two-sample t-test when only two subtypes are identified, and anova for multiple subtypes comparison; "nonparametric" by default. |
|
|
15 |
#' @return Data.frame(s) storing the estimated IC50 of specified drugs per sample within each Subtype. |
|
|
16 |
#' @export |
|
|
17 |
#' @import ggplot2 |
|
|
18 |
#' @importFrom ggpubr stat_compare_means |
|
|
19 |
#' @references Geeleher P, Cox N, Huang R S. (2014). pRRophetic: an R package for prediction of clinical chemotherapeutic response from tumor gene expression levels. PLoS One, 9(9):e107468. |
|
|
20 |
#' |
|
|
21 |
#' Geeleher P, Cox N J, Huang R S. (2014). Clinical drug response can be predicted using baseline gene expression levels and in vitro drug sensitivity in cell lines. Genome Biol, 15(3):1-12. |
|
|
22 |
#' @examples # There is no example and please refer to vignette. |
|
|
23 |
compDrugsen <- function(moic.res = NULL, |
|
|
24 |
norm.expr = NULL, |
|
|
25 |
drugs = c("Cisplatin", "Paclitaxel"), |
|
|
26 |
tissueType = "all", |
|
|
27 |
test.method = "nonparametric", |
|
|
28 |
clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"), |
|
|
29 |
prefix = NULL, |
|
|
30 |
seed = 123456, |
|
|
31 |
fig.path = getwd(), |
|
|
32 |
width = 5, |
|
|
33 |
height = 5) { |
|
|
34 |
|
|
|
35 |
if(!is.element(test.method, c("nonparametric","parametric"))) { |
|
|
36 |
stop("test.method can be one of nonparametric or parametric.") |
|
|
37 |
} |
|
|
38 |
|
|
|
39 |
# data processing |
|
|
40 |
comsam <- intersect(moic.res$clust.res$samID, colnames(norm.expr)) |
|
|
41 |
# check data |
|
|
42 |
if(length(comsam) == nrow(moic.res$clust.res)) { |
|
|
43 |
message("--all samples matched.") |
|
|
44 |
} else { |
|
|
45 |
message(paste0("--",(nrow(moic.res$clust.res)-length(comsam))," samples mismatched from current subtypes.")) |
|
|
46 |
} |
|
|
47 |
moic.res$clust.res <- moic.res$clust.res[comsam,,drop = FALSE] |
|
|
48 |
norm.expr <- norm.expr[,comsam] |
|
|
49 |
|
|
|
50 |
n.moic <- length(unique(moic.res$clust.res$clust)) |
|
|
51 |
sam.order <- moic.res$clust.res[order(moic.res$clust.res$clust, decreasing = FALSE), "samID"] |
|
|
52 |
colvec <- clust.col[1:length(unique(moic.res$clust.res$clust))] |
|
|
53 |
names(colvec) <- paste0("CS",unique(moic.res$clust.res$clust)) |
|
|
54 |
|
|
|
55 |
annCol <- data.frame("Subtype" = paste0("CS",moic.res$clust.res[sam.order,"clust"]), |
|
|
56 |
samID = sam.order, |
|
|
57 |
row.names = sam.order, |
|
|
58 |
stringsAsFactors = FALSE) |
|
|
59 |
|
|
|
60 |
if(max(norm.expr) < 25 | (max(norm.expr) >= 25 & min(norm.expr) < 0)) { |
|
|
61 |
message("--expression profile seems to have veen standardised (z-score or log transformation), no more action will be performed.") |
|
|
62 |
gset <- norm.expr |
|
|
63 |
} |
|
|
64 |
if(max(norm.expr) >= 25 & min(norm.expr) >= 0){ |
|
|
65 |
message("--log2 transformation done for expression data.") |
|
|
66 |
gset <- log2(norm.expr + 1) |
|
|
67 |
} |
|
|
68 |
|
|
|
69 |
# drug sensitivity prediction |
|
|
70 |
predictedPtype <- predictedBoxdat <- list() |
|
|
71 |
|
|
|
72 |
for (drug in drugs) { |
|
|
73 |
set.seed(seed) |
|
|
74 |
|
|
|
75 |
predictedPtype[[drug]] <- quiet(pRRopheticPredict(testMatrix = as.matrix(gset[,rownames(annCol)]), |
|
|
76 |
drug = drug, |
|
|
77 |
tissueType = tissueType, |
|
|
78 |
dataset = "cgp2016", |
|
|
79 |
minNumSamples = 5, |
|
|
80 |
selection = 1)) # 1 indicate if multiple genes existed, mean value will be considered |
|
|
81 |
|
|
|
82 |
if(!all(names(predictedPtype[[drug]]) == rownames(annCol))) {stop("name mismatched!\n")} |
|
|
83 |
|
|
|
84 |
predictedBoxdat[[drug]] <- data.frame("Est.IC50" = predictedPtype[[drug]], |
|
|
85 |
"Subtype" = as.character(annCol$Subtype), |
|
|
86 |
row.names = names(predictedPtype[[drug]]), |
|
|
87 |
stringsAsFactors = FALSE) |
|
|
88 |
message(drug," done...") |
|
|
89 |
|
|
|
90 |
# generate boxviolin plot with statistical testing |
|
|
91 |
if(n.moic == 2 & test.method == "nonparametric") { |
|
|
92 |
statistic = "wilcox.test" |
|
|
93 |
ic50.test <- wilcox.test(predictedBoxdat[[drug]]$Est.IC50 ~ predictedBoxdat[[drug]]$Subtype)$p.value |
|
|
94 |
cat(paste0("Wilcoxon rank sum test p value = ", formatC(ic50.test, format = "e", digits = 2), " for ", drug)) |
|
|
95 |
} |
|
|
96 |
if(n.moic == 2 & test.method == "parametric") { |
|
|
97 |
statistic = "t.test" |
|
|
98 |
ic50.test <- t.test(predictedBoxdat[[drug]]$Est.IC50 ~ predictedBoxdat[[drug]]$Subtype)$p.value |
|
|
99 |
cat(paste0("Student's t test p value = ", formatC(ic50.test, format = "e", digits = 2), " for ", drug)) |
|
|
100 |
} |
|
|
101 |
if(n.moic > 2 & test.method == "nonparametric") { |
|
|
102 |
statistic = "kruskal.test" |
|
|
103 |
ic50.test <- kruskal.test(predictedBoxdat[[drug]]$Est.IC50 ~ predictedBoxdat[[drug]]$Subtype)$p.value |
|
|
104 |
pairwise.ic50.test <- pairwise.wilcox.test(predictedBoxdat[[drug]]$Est.IC50,predictedBoxdat[[drug]]$Subtype,p.adjust.method = "BH") |
|
|
105 |
cat(paste0(drug,": Kruskal-Wallis rank sum test p value = ", formatC(ic50.test, format = "e", digits = 2),"\npost-hoc pairwise wilcoxon rank sum test with Benjamini-Hochberg adjustment presents below:\n")) |
|
|
106 |
print(formatC(pairwise.ic50.test$p.value, format = "e", digits = 2)) |
|
|
107 |
} |
|
|
108 |
if(n.moic > 2 & test.method == "parametric") { |
|
|
109 |
statistic = "anova" |
|
|
110 |
ic50.test <- summary(aov(predictedBoxdat[[drug]]$Est.IC50 ~ predictedBoxdat[[drug]]$Subtype))[[1]][["Pr(>F)"]][1] |
|
|
111 |
pairwise.ic50.test <- pairwise.t.test(predictedBoxdat[[drug]]$Est.IC50,predictedBoxdat[[drug]]$Subtype,p.adjust.method = "BH") |
|
|
112 |
cat(paste0(drug,": One-way anova test p value = ", formatC(ic50.test, format = "e", digits = 2),"\npost-hoc pairwise Student's t test with Benjamini-Hochberg adjustment presents below:\n")) |
|
|
113 |
print(formatC(pairwise.ic50.test$p.value, format = "e", digits = 2)) |
|
|
114 |
} |
|
|
115 |
|
|
|
116 |
p <- ggplot(data = predictedBoxdat[[drug]], |
|
|
117 |
aes(x = Subtype, y = Est.IC50, fill = Subtype)) + |
|
|
118 |
scale_fill_manual(values = colvec) + |
|
|
119 |
geom_violin(alpha = 0.4, position = position_dodge(width = .75), |
|
|
120 |
size = 0.8, color = "black") + |
|
|
121 |
geom_boxplot(notch = TRUE, outlier.size = -1, |
|
|
122 |
color = "black", lwd = 0.8, alpha = 0.7) + |
|
|
123 |
geom_point(shape = 21, size = 2, |
|
|
124 |
position = position_jitterdodge(), |
|
|
125 |
color = "black", alpha = 1) + |
|
|
126 |
theme_classic() + |
|
|
127 |
ylab(bquote("Estimated IC"[50]~"of"~.(drug))) + xlab("") + |
|
|
128 |
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), |
|
|
129 |
axis.ticks = element_line(size = 0.2, color = "black"), |
|
|
130 |
axis.ticks.length = unit(0.2, "cm"), |
|
|
131 |
legend.position = "none", |
|
|
132 |
axis.title = element_text(size = 15), |
|
|
133 |
axis.text = element_text(size = 10)) + |
|
|
134 |
# add statistical inference |
|
|
135 |
stat_compare_means(method = statistic, |
|
|
136 |
hjust = ifelse(n.moic %% 2 == 0, 0.5, 0), |
|
|
137 |
label.x = ifelse(n.moic %% 2 == 0, n.moic / 2 + 0.5, n.moic / 2), |
|
|
138 |
label.y = min(predictedBoxdat[[drug]]$Est.IC50)) |
|
|
139 |
|
|
|
140 |
# save to pdf |
|
|
141 |
if(is.null(prefix)) { |
|
|
142 |
outFig <- paste0("boxviolin of estimated ic50 for ",drug,".pdf") |
|
|
143 |
} else { |
|
|
144 |
outFig <- paste0(prefix, " for ", drug,".pdf") |
|
|
145 |
} |
|
|
146 |
ggsave(file.path(fig.path, outFig), width = width, height = height) |
|
|
147 |
# print to screen |
|
|
148 |
print(p) |
|
|
149 |
} |
|
|
150 |
return(predictedBoxdat) |
|
|
151 |
} |