Diff of /vignettes/src/part02.Rmd [000000] .. [226bc8]

Switch to side-by-side view

--- a
+++ b/vignettes/src/part02.Rmd
@@ -0,0 +1,487 @@
+---
+title: "Part 2"
+output:
+  BiocStyle::html_document
+---
+
+```{r, message=FALSE, include=!exists(".standalone"), eval=!exists(".standalone")}
+library("BloodCancerMultiOmics2017")
+library("reshape2") # melt
+library("Biobase")
+library("dplyr")
+library("RColorBrewer")
+library("ggplot2")
+library("ggdendro")
+library("gtable")
+library("grid")
+library("Rtsne")
+library("ggbeeswarm")
+```
+
+```{r echo=FALSE}
+plotDir = ifelse(exists(".standalone"), "", "part02/")
+if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
+```
+
+```{r}
+options(stringsAsFactors=FALSE)
+```
+
+# Drug-induced effects on cell viability
+
+Loading the data.
+```{r}
+data("lpdAll")
+```
+
+Here we show a relative cell viability (as compared to negative control) under treatment with 64 drugs at 5 concentrations steps each.
+
+Prepare data.
+```{r}
+#select drug screening data on patient samples
+lpd <- lpdAll[fData(lpdAll)$type == "viab", pData(lpdAll)$Diagnosis != "hMNC"]
+viabTab <- Biobase::exprs(lpd)
+viabTab <- viabTab[,complete.cases(t(viabTab))]
+viabTab <- reshape2::melt(viabTab)
+viabTab$Concentration <- fData(lpd)[viabTab$Var1,"subtype"]
+viabTab <- viabTab[viabTab$Concentration %in% c("1","2","3","4","5"),]
+viabTab$drugName <- fData(lpd)[viabTab$Var1,"name"]
+viabTab <- viabTab[order(viabTab$Concentration),]
+
+#order drug by mean viablitity
+drugOrder <- group_by(viabTab, drugName) %>%
+  summarise(meanViab = mean(value)) %>%
+  arrange(meanViab)
+viabTab$drugName <- factor(viabTab$drugName, levels = drugOrder$drugName)
+```
+
+Scatter plot for viabilities and using colors for concentrations.
+```{r ViabilityScatter_main, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=10, fig.height=5, warning=FALSE, out.width=560, out.height=280}
+#FIG# S1
+
+#Color for each concentration
+colorCode <- rev(brewer.pal(7,"Blues")[3:7])
+names(colorCode) <- unique(viabTab$Concentration)
+
+ggplot(viabTab, aes(x=drugName,y=value, color=Concentration)) +
+  geom_jitter(size=1, na.rm = TRUE, alpha=0.8, shape =16) +
+  scale_color_manual(values = colorCode) +
+  ylab("Viability") + ylim(c(0,1.2)) + xlab("") +
+  guides(color = guide_legend(override.aes = list(size=3,alpha=1),
+                              title = "concentration index")) +
+  theme_bw() +
+  theme(legend.position = "bottom",
+        axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.5),
+        legend.key = element_blank())
+```
+
+The plot shows high variability of effects between different drugs, from mostly lethal
+(left) to mostly neutral (right), concentration dependence of effects and high variability of effects of the same drug/concentration across patients.
+
+# Drug-drug correlation
+
+We compared response patterns produced by drugs using phenotype clustering within diseases (CLL, T-PLL and MCL) using Pearson correlation analysis. The results imply that the drug assays probe tumor cells’ specific dependencies on survival pathways.
+
+Loading the data.
+```{r}
+data("drpar", "patmeta", "drugs")
+```
+
+## Additional processing functions and parameters
+
+Function that return the subset of samples for a given diagnosis (or all samples if diag=NA).
+```{r}
+givePatientID4Diag = function(pts, diag=NA) {
+  pts = if(is.na(diag)) {
+    names(pts)
+  } else {
+    names(pts[patmeta[pts,"Diagnosis"]==diag])
+  }
+  pts
+}
+```
+
+Function that returns the viability matrix for given screen (for a given channel) for patients with given diagnosis.
+```{r}
+giveViabMatrix = function(diag, screen, chnnl) {
+  data = if(screen=="main") drpar
+          else print("Incorrect screen name.")
+  pid = colnames(data)
+  if(!is.na(diag))
+    pid = pid[patmeta[pid,"Diagnosis"]==diag]
+  
+  return(assayData(data)[[chnnl]][,pid])
+}
+```
+
+Color scales for the heat maps.
+```{r}
+palette.cor1 = c(rev(brewer.pal(9, "Blues"))[1:8],
+                 "white","white","white","white",brewer.pal(7, "Reds"))
+palette.cor2 = c(rev(brewer.pal(9, "Blues"))[1:8],
+                 "white","white","white","white",brewer.pal(7, "YlOrRd"))
+```
+
+## CLL/T-PLL
+
+Pearson correlation coefficients were calculated based on the mean drug response for the two lowest concentration steps in the main screen and across CLL and T-PLL samples separately. Square correlation matrices were plotted together, with CLL in lower triangle and T-PLL in upper triangle. The drugs in a heat map are ordered by hierarchical clustering applied to drug responses of CLL samples.
+
+```{r}
+main.cll.tpll = BloodCancerMultiOmics2017:::makeCorrHeatmap(
+  mt=giveViabMatrix(diag="CLL", screen="main", chnnl="viaraw.4_5"),
+  mt2=giveViabMatrix(diag="T-PLL", screen="main", chnnl="viaraw.4_5"),
+  colsc=palette.cor2, concNo="one")
+```
+
+```{r main.CLL.T-PLL, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=main.cll.tpll[["figure"]][["width"]], fig.height=main.cll.tpll[["figure"]][["height"]]}
+#FIG# 2A
+grid.draw(main.cll.tpll[["figure"]][["plot"]])
+```
+
+```{r main.cll.tpll.legend, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=main.cll.tpll[["legend"]][["width"]], fig.height=main.cll.tpll[["legend"]][["height"]], out.width=300, out.height=150}
+#FIG# 2A
+grid.draw(main.cll.tpll[["legend"]][["plot"]])
+```
+
+The major clusters in CLL include: kinase inhibitors targeting the B cell receptor, including idelalisib (PI3K), ibrutinib (BTK), duvelisib (PI3K), PRT062607 (SYK); inhibitors of redox signalling / reactive oxygen species (MIS−43, SD07, SD51); and BH3-mimetics (navitoclax, venetoclax).
+
+### Effect of drugs with similar target
+
+Here we compare the effect of drugs designed to target components of the same signalling pathway. 
+
+```{r}
+# select the data
+mtcll = as.data.frame(t(giveViabMatrix(diag="CLL",
+                                       screen="main",
+                                       chnnl="viaraw.4_5")))
+colnames(mtcll) = drugs[colnames(mtcll),"name"]
+
+# function which plots the scatter plot
+scatdr = function(drug1, drug2, coldot, mtNEW, min){
+  
+  dataNEW = mtNEW[,c(drug1, drug2)]
+  colnames(dataNEW) = c("A", "B")
+
+  p = ggplot(data=dataNEW,  aes(A,  B)) + geom_point(size=3, col=coldot, alpha=0.8) +
+    labs(x = drug1, y = drug2) + ylim(c(min, 1.35)) +  xlim(c(min, 1.35)) +
+    theme(panel.background = element_blank(),
+          axis.text = element_text(size = 15),
+          axis.title = element_text(size = rel(1.5)),
+          axis.line.x = element_line(colour = "black", size = 0.5),
+          axis.line.y = element_line(colour = "black", size = 0.5),
+          panel.grid.major = element_blank(),
+          panel.grid.minor = element_blank()) +
+    geom_smooth(method=lm) +
+    geom_text(x=1, y=min+0.1,
+              label=paste0("Pearson-R = ",
+                           round(cor(dataNEW$A, dataNEW$B ), 2)),
+              size = 5)
+  
+  return(p)
+}
+```
+
+
+```{r cor_scatter, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), , fig.width=6, fig.height=4, warning=FALSE, out.width=420, out.height=280}
+#FIG# 2A
+scatdr("ibrutinib", "spebrutinib", coldot="deeppink1", mtNEW=mtcll, min=0.4)
+scatdr("ibrutinib", "PRT062607 HCl", coldot="deeppink1", mtNEW=mtcll, min=0.4)
+scatdr("ibrutinib", "idelalisib", coldot="deeppink1", mtNEW=mtcll, min=0.4)
+scatdr("venetoclax", "navitoclax", coldot="goldenrod2", mtNEW=mtcll, min=0.2)
+scatdr("SD51", "MIS-43", coldot="dodgerblue3", mtNEW=mtcll, min=0.2)
+```
+
+
+## T-PLL
+
+Pearson correlation coefficients were calculated based on the mean drug response for the two lowest concentration steps in the main screen across T-PLL samples.
+
+```{r}
+main.tpll = BloodCancerMultiOmics2017:::makeCorrHeatmap(
+  mt=giveViabMatrix(diag="T-PLL", screen="main", chnnl="viaraw.4_5"),
+  colsc=palette.cor1, concNo="one")
+```
+
+```{r main.T-PLL, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=main.tpll[["figure"]][["width"]], fig.height=main.tpll[["figure"]][["height"]]}
+#FIG# S6 B
+grid.draw(main.tpll[["figure"]][["plot"]])
+```
+
+```{r main.tpll.legend, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=main.tpll[["legend"]][["width"]], fig.height=main.tpll[["legend"]][["height"]], out.width=300, out.height=150}
+#FIG# S6 B
+grid.draw(main.tpll[["legend"]][["plot"]])
+```
+
+Clusters of drugs with high correlation and anti-correlation are shown by red and blue squares, respectively.
+
+Inhibitors of redox signaling / reactive oxygen species (MIS-43, SD07, SD51) are clustering together. Otherwise, in T-PLL samples correlations are not well pronounced.
+
+## MCL
+
+Pearson correlation coefficients were calculated based on the mean drug response for the two lowest concentration steps in the main screen across MCL samples.
+
+```{r}
+main.mcl = BloodCancerMultiOmics2017:::makeCorrHeatmap(
+  mt=giveViabMatrix(diag="MCL", screen="main", chnnl="viaraw.4_5"),
+  colsc=palette.cor1, concNo="one")
+```
+
+```{r main.MCL, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=main.mcl[["figure"]][["width"]], fig.height=main.mcl[["figure"]][["height"]]}
+#FIG# S6 A
+grid.draw(main.mcl[["figure"]][["plot"]])
+```
+
+```{r main.mcl.legend, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=main.mcl[["legend"]][["width"]], fig.height=main.mcl[["legend"]][["height"]], out.width=300, out.height=150}
+#FIG# S6 A
+grid.draw(main.mcl[["legend"]][["plot"]])
+```
+
+Clusters of drugs with high correlation and anti-correlation are shown by red and blue squares, respectively.
+
+The major clusters include: kinase inhibitors of the B cell receptor, incl. idelalisib (PI3K), ibrutinib (BTK), duvelisib (PI3K), PRT062607 (SYK); inhibitors of redox signaling / reactive oxygen species (MIS-43, SD07, SD51) and BH3 mimetics (navitoclax, venetoclax).
+
+
+# Disease-specific drug response phenotypes
+
+Loading the data.
+```{r}
+data(list=c("lpdAll", "conctab", "patmeta"))
+```
+
+Preprocessing of drug screen data.
+```{r}
+#Select rows contain drug response data
+lpdSub <- lpdAll[fData(lpdAll)$type == "viab",]
+
+#Only use samples with complete values
+lpdSub <- lpdSub[,complete.cases(t(Biobase::exprs(lpdSub)))]
+
+#Transformation of the values
+Biobase::exprs(lpdSub) <- log(Biobase::exprs(lpdSub))
+Biobase::exprs(lpdSub) <- t(scale(t(Biobase::exprs(lpdSub))))
+
+#annotation for drug ID
+anno <- sprintf("%s(%s)",fData(lpdSub)$name,fData(lpdSub)$subtype)
+names(anno) <- rownames(lpdSub)
+```
+
+Function to run t-SNE.
+```{r}
+tsneRun <- function(distMat,perplexity=10,theta=0,max_iter=5000, seed = 1000) {
+  set.seed(seed)
+  tsneRes <- Rtsne(distMat, perplexity = perplexity, theta = theta, 
+                   max_iter = max_iter, is_distance = TRUE, dims =2)
+  tsneRes <- tsneRes$Y
+  rownames(tsneRes) <- labels(distMat)
+  colnames(tsneRes) <- c("x","y")
+  tsneRes
+}
+```
+
+Setting color scheme for the plot.
+```{r}
+colDiagFill = c(`CLL` = "grey80",
+    `U-CLL` = "grey80",
+    `B-PLL`="grey80",
+    `T-PLL`="#cc5352",
+    `Sezary`="#cc5352",
+    `PTCL-NOS`="#cc5352",
+    `HCL`="#b29441",
+    `HCL-V`="mediumaquamarine",
+    `AML`="#addbaf",
+    `MCL`="#8e65ca",
+    `MZL`="#c95e9e",
+    `FL`="darkorchid4",
+    `LPL`="#6295cd",
+    `hMNC`="pink")
+
+colDiagBorder <- colDiagFill
+colDiagBorder["U-CLL"] <- "black"
+```
+
+Sample annotation.
+```{r}
+annoDiagNew <- function(patList, lpdObj = lpdSub) {
+  Diagnosis <- pData(lpdObj)[patList,c("Diagnosis","IGHV Uppsala U/M")]
+  DiagNew <- c()
+  
+  for (i in seq(1:nrow(Diagnosis))) {
+   if (Diagnosis[i,1] == "CLL") {
+      if (is.na(Diagnosis[i,2])) {
+        DiagNew <- c(DiagNew,"CLL")
+      } else if (Diagnosis[i,2] == "U") {
+        DiagNew <- c(DiagNew,sprintf("%s-%s",Diagnosis[i,2],Diagnosis[i,1]))
+      } else if (Diagnosis[i,2] == "M") {
+        DiagNew <- c(DiagNew,"CLL")
+      }
+    } else DiagNew <- c(DiagNew,Diagnosis[i,1])
+  }
+  DiagNew
+}
+```
+
+Calculate t-SNE and prepare data for plotting the result.
+```{r}
+#prepare distance matrix
+distLpd <- dist(t(Biobase::exprs(lpdSub)))
+
+#run t-SNE
+plotTab <- data.frame(tsneRun(distLpd,perplexity=25, max_iter=5000, seed=338))
+
+#annotated patient sample
+plotTab$Diagnosis <- pData(lpdSub[,rownames(plotTab)])$Diagnosis
+plotTab$Diagnosis <- annoDiagNew(rownames(plotTab,lpdSub)) #consider IGHV status
+plotTab$Diagnosis <- factor(plotTab$Diagnosis,levels = names(colDiagFill))
+```
+
+```{r tSNE, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=10, fig.height=8, warning=FALSE, out.width=600, out.height=480}
+#FIG# 2 C
+p <- (ggplot(plotTab, aes(x=x,y=y)) +
+        geom_point(size=3, shape= 21, aes(col = Diagnosis, fill = Diagnosis)) +
+        theme_classic() +
+        theme(axis.ticks=element_line(color="black",size=0.5),
+              text=element_text(size=20),
+              axis.line.x = element_line(color="black",size=0.5),
+              axis.line.y = element_line(color="black",size=0.5),
+              legend.position="right") +
+        scale_fill_manual(values = colDiagFill) +
+        scale_color_manual(values = colDiagBorder) +
+        xlab("Component 1") + ylab("Component 2")) +
+  coord_cartesian(xlim = c(-20,20),ylim=c(-20,20))
+
+print(p)
+```
+
+## Example: dose-response curves
+
+Here we show dose-response curve for selected drugs and patients.
+
+First, change concentration index into real concentrations according to `conctab`.
+```{r}
+lpdPlot <- lpdAll[fData(lpdAll)$type == "viab",]
+concList <- c()
+for (drugID in rownames(fData(lpdPlot))) {
+  concIndex <- as.character(fData(lpdPlot)[drugID,"subtype"])
+  concSplit <- unlist(strsplit(as.character(concIndex),":"))
+  ID <- substr(drugID,1,5)
+  if (length(concSplit) == 1) {
+    realConc <- conctab[ID,as.integer(concSplit)]
+    concList <- c(concList,realConc)
+  } else {
+    realConc <- sprintf("%s:%s",
+                        conctab[ID,as.integer(concSplit[1])],
+                        conctab[ID,as.integer(concSplit[2])])
+    concList <- c(concList,realConc)
+  }
+}
+
+fData(lpdPlot)$concValue <- concList
+lpdPlot <- lpdPlot[,complete.cases(t(Biobase::exprs(lpdPlot)))]
+```
+
+Select drugs and samples.
+```{r}
+patDiag <- c("CLL","T-PLL","HCL","MCL")
+drugID <- c("D_012_5","D_017_4","D_039_3","D_040_5","D_081_4","D_083_5")
+
+lpdBee <- lpdPlot[drugID,pData(lpdPlot)$Diagnosis %in% patDiag]
+```
+
+Prepare the data for plot
+```{r}
+lpdCurve <-
+  lpdPlot[fData(lpdPlot)$name %in% fData(lpdBee)$name,
+          pData(lpdPlot)$Diagnosis %in% patDiag]
+lpdCurve <- lpdCurve[fData(lpdCurve)$subtype %in% seq(1,5),]
+dataCurve <- data.frame(Biobase::exprs(lpdCurve))
+dataCurve <- cbind(dataCurve,fData(lpdCurve)[,c("name","concValue")])
+tabCurve <- melt(dataCurve,
+                 id.vars = c("name","concValue"), variable.name = "patID")
+tabCurve$Diagnosis <- factor(pData(lpdCurve[,tabCurve$patID])$Diagnosis,
+                             levels = patDiag)
+tabCurve$value <- tabCurve$value
+tabCurve$concValue <- as.numeric(tabCurve$concValue)
+
+# set order
+tabCurve$name <- factor(tabCurve$name, levels = fData(lpdBee)$name)
+
+#calculate the mean and mse for each drug+cencentration in different disease
+tabGroup <- group_by(tabCurve,name,concValue,Diagnosis)
+tabSum <- summarise(tabGroup,meanViab = mean(value))
+```
+
+
+Finally, plot dose-response curve for each selected drug.
+```{r viabilityCurve, fig.path=plotDir, dev=c("png", "pdf"), fig.width=4, fig.height=3}
+#FIG# 2 C
+tconc = expression("Concentration [" * mu * "M]")
+fmt_dcimals <- function(decimals=0){
+   # return a function responpsible for formatting the 
+   # axis labels with a given number of decimals 
+   function(x) as.character(round(x,decimals))
+}
+
+for (drugName in unique(tabSum$name)) {
+  tabDrug <- filter(tabSum, name == drugName)
+  p <- (ggplot(data=tabDrug, aes(x=concValue,y=meanViab, col=Diagnosis)) +
+          geom_line() + geom_point(pch=16, size=4) +
+          scale_color_manual(values = colDiagFill[patDiag])
+        + theme_classic() +
+          theme(panel.border=element_blank(),
+                axis.line.x=element_line(size=0.5,
+                                         linetype="solid", colour="black"),
+                axis.line.y = element_line(size = 0.5,
+                                           linetype="solid", colour="black"),
+                legend.position="none",
+                plot.title = element_text(hjust = 0.5, size=20),
+                axis.text = element_text(size=15),
+                axis.title = element_text(size=20)) +
+          ylab("Viability") + xlab(tconc) + ggtitle(drugName) +
+          scale_x_log10(labels=fmt_dcimals(2)) +
+          scale_y_continuous(limits = c(0,1.3), breaks = seq(0,1.3,0.2)))
+  plot(p)
+}
+```
+
+## Example: drug effects as bee swarms
+
+```{r viabilityBee, fig.path=plotDir, dev=c("png", "pdf"), fig.width=5, fig.height=10, warning=FALSE}
+#FIG# 2 D
+lpdDiag <- lpdAll[,pData(lpdAll)$Diagnosis %in% c("CLL", "MCL", "HCL", "T-PLL")]
+dr <- c("D_012_5", "D_083_5", "D_081_3", "D_040_4", "D_039_3")
+
+m <- data.frame(t(Biobase::exprs(lpdDiag)[dr, ]), diag=pData(lpdDiag)$Diagnosis)
+m <- melt(m)
+m$lable <- 1
+for (i in 1:nrow(m )) {
+  m[i, "lable"] <- giveDrugLabel(as.character(m[i, "variable"]), conctab, drugs)
+} 
+
+  
+ggplot( m, aes(diag, value, color=factor(diag) ) ) +
+  ylim(0,1.3) + ylab("Viability") + 
+  xlab("") +
+  geom_boxplot(outlier.shape = NA) +
+  geom_beeswarm(cex=1.4, size=1.4,alpha=0.5, color="grey80") +
+  scale_color_manual("diagnosis", values=c(colDiagFill["CLL"], colDiagFill["MCL"], 
+                                           colDiagFill["HCL"], colDiagFill["T-PLL"])) +
+  theme_bw() +
+  theme(legend.position="right") +
+  theme(
+    panel.background =  element_blank(), 
+    panel.grid.minor.x =  element_blank(),
+    axis.text = element_text(size=15),
+    axis.title = element_text(size=15),
+    strip.text = element_text(size=15)
+  ) +
+  facet_wrap(~ lable, ncol=1) 
+```
+
+
+```{r, include=!exists(".standalone"), eval=!exists(".standalone")}
+sessionInfo()
+```
+
+```{r, message=FALSE, warning=FALSE, include=FALSE}
+rm(list=ls())
+```