Diff of /vignettes/src/part11.Rmd [000000] .. [226bc8]

Switch to unified view

a b/vignettes/src/part11.Rmd
1
---
2
title: "Part 11"
3
output:
4
  BiocStyle::html_document
5
---
6
7
```{r, message=FALSE, include=!exists(".standalone"), eval=!exists(".standalone")}
8
library("BloodCancerMultiOmics2017")
9
library("Biobase")
10
library("SummarizedExperiment")
11
library("AnnotationDbi")
12
library("org.Hs.eg.db")
13
library("dplyr")
14
library("abind")
15
library("reshape2")
16
library("RColorBrewer")
17
library("glmnet")
18
library("ipflasso")
19
library("ggplot2")
20
library("grid")
21
library("DESeq2")
22
```
23
24
```{r echo=FALSE}
25
plotDir = ifelse(exists(".standalone"), "", "part11/")
26
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
27
```
28
29
```{r}
30
options(stringsAsFactors=FALSE)
31
```
32
33
34
# Drug response prediction
35
36
Drug response heterogeneity is caused by the unique deregulations in biology of the tumor cell. Those deregulations leave trace on the different molecular levels and have a various impact on cell's drug sensitivity profile. Here we use multivariate regression to integrate information from the multi-omic data in order to predict drug response profiles of the CLL samples.
37
38
Loading the data.
39
```{r}
40
data(list=c("conctab", "drpar", "drugs", "patmeta", "lpdAll", "dds", "mutCOM",
41
"methData"))
42
```
43
44
45
## Assesment of omics capacity in explaining drug response
46
47
### Data pre-processing
48
49
Filtering steps and transformations.
50
```{r}
51
e<-dds
52
colnames(e)<-colData(e)$PatID
53
54
55
#only consider CLL patients
56
CLLPatients<-rownames(patmeta)[which(patmeta$Diagnosis=="CLL")]
57
58
#Methylation Data
59
methData = t(assay(methData)) 
60
61
62
#RNA Data
63
eCLL<-e[,colnames(e) %in% CLLPatients]
64
###
65
#filter out genes without gene namce
66
AnnotationDF<-data.frame(EnsembleId=rownames(eCLL),stringsAsFactors=FALSE)
67
AnnotationDF$symbol <- mapIds(org.Hs.eg.db,
68
                     keys=rownames(eCLL),
69
                     column="SYMBOL",
70
                     keytype="ENSEMBL",
71
                     multiVals="first")
72
eCLL<-eCLL[AnnotationDF$EnsembleId[!is.na(AnnotationDF$symbol)],]
73
74
#filter out low count genes
75
###
76
minrs <- 100
77
rs  <- rowSums(assay(eCLL))
78
eCLL<-eCLL[ rs >= minrs, ]
79
#variance stabilize the data
80
#(includes normalizing for library size and dispsersion estimation) 
81
vstCounts<-varianceStabilizingTransformation(eCLL)
82
vstCounts<-assay(vstCounts)
83
#no NAs in data
84
any(is.na(vstCounts))
85
86
#filter out low variable genes
87
ntop<-5000
88
vstCountsFiltered<-vstCounts[order(apply(vstCounts, 1, var, na.rm=T),
89
                                   decreasing = T)[1:ntop],]
90
eData<-t(vstCountsFiltered)
91
#no NAs
92
any(is.na(eData))
93
94
#genetics
95
#remove features with less than 5 occurences
96
mutCOMbinary<-channel(mutCOM, "binary")
97
mutCOMbinary<-mutCOMbinary[featureNames(mutCOMbinary) %in% CLLPatients,]
98
genData<-Biobase::exprs(mutCOMbinary)
99
idx <- which(colnames(genData) %in% c("del13q14_bi", "del13q14_mono"))
100
genData <- genData[,-idx]
101
colnames(genData)[which(colnames(genData)=="del13q14_any")] = "del13q14"
102
minObs <- 5
103
#remove feutes with less than 5 occurecnes
104
genData<-genData[,colSums(genData, na.rm=T)>=minObs]
105
106
#IGHV
107
translation <- c(`U` = 0, `M` = 1)
108
stopifnot(all(patmeta$IGHV %in% c("U","M", NA)))
109
IGHVData <- matrix(translation[patmeta$IGHV], 
110
                   dimnames = list(rownames(patmeta), "IGHV"), ncol = 1)
111
IGHVData<-IGHVData[rownames(IGHVData) %in% CLLPatients,,drop=F]
112
#remove patiente with NA IGHV status
113
IGHVData<-IGHVData[!is.na(IGHVData), ,drop=F]
114
any(is.na(IGHVData))
115
116
#demographics (age and sex)
117
patmeta<-subset(patmeta, Diagnosis=="CLL")
118
gender <- ifelse(patmeta[,"Gender"]=="m",0,1)
119
120
121
# impute missing values in age by mean
122
ImputeByMean <- function(x) {x[is.na(x)] <- mean(x, na.rm=TRUE); return(x)}
123
age<-ImputeByMean(patmeta[,"Age4Main"])
124
125
126
demogrData <- cbind(age=age,gender=gender)
127
rownames(demogrData) <- rownames(patmeta)
128
129
#Pretreatment
130
pretreated<-patmeta[,"IC50beforeTreatment", drop=FALSE]
131
132
##### drug viabilites
133
summaries <- c(paste("viaraw", 1:5, sep=".") %>% `names<-`(paste(1:5)), 
134
               `4:5` = "viaraw.4_5", `1:5` = "viaraw.1_5")
135
a <- do.call( abind, c( along=3, lapply( summaries, 
136
                                         function(x) assayData(drpar)[[x]]))) 
137
dimnames(a)[[3]] <- names(summaries)
138
names(dimnames(a)) <- c( "drug", "patient", "summary" )
139
viabData <- acast( melt(a), patient ~ drug + summary )
140
rownames(viabData)<-c(substr(rownames(viabData),1,4)[1:3],
141
                      substr(rownames(viabData),1,5)[4:nrow(viabData)])
142
```
143
144
145
Check overlap of data and take care of missing values present in methylation and genetic data.
146
```{r}
147
# common patients 
148
Xlist<-list(RNA=eData, meth=methData, gen=genData, IGHV=IGHVData,
149
            demographics=demogrData, drugs=viabData, pretreated=pretreated)
150
PatientsPerOmic<-lapply(Xlist, rownames)
151
sapply(PatientsPerOmic, length)
152
153
allPatients<-Reduce(union, PatientsPerOmic)
154
PatientOverview<-sapply(Xlist, function(M) allPatients %in% rownames(M))
155
Patients <- (1:nrow(PatientOverview))
156
Omics <- (1:ncol(PatientOverview))
157
image(Patients,Omics, PatientOverview*1, axes=F, col=c("white", "black"),
158
      main="Sample overview across omics")
159
axis(2, at = 1:ncol(PatientOverview), labels=colnames(PatientOverview), tick=F)
160
161
commonPatients<-Reduce(intersect, PatientsPerOmic)
162
length(commonPatients)
163
XlistCommon<-lapply(Xlist, function(data) data[commonPatients,, drop=F])
164
165
#Take care of missing values (present in  genetic data)
166
ImputeByMean <- function(x) {x[is.na(x)] <- mean(x, na.rm=TRUE); return(x)}
167
168
#NAs in genetic
169
#remove feauters with less 90% completeness
170
RarlyMeasuredFeautres<-
171
  which(colSums(is.na(XlistCommon$gen))>0.1*nrow(XlistCommon$gen))
172
XlistCommon$gen<-XlistCommon$gen[,-RarlyMeasuredFeautres]
173
#remove patients with less than 90% of genetic feautres measured
174
IncompletePatients<-
175
  rownames(XlistCommon$gen)[
176
    (rowSums(is.na(XlistCommon$gen))>0.1*ncol(XlistCommon$gen))]
177
commonPatients<-commonPatients[!commonPatients %in% IncompletePatients]
178
XlistCommon<-lapply(XlistCommon, function(data) data[commonPatients,, drop=F])
179
#replace remaining NA by mean and round to 0 or 1
180
XlistCommon$gen<-round(apply(XlistCommon$gen, 2, ImputeByMean))
181
182
#NAs in methylation
183
#remove feauters with less 90% completeness
184
XlistCommon$meth<-
185
  XlistCommon$meth[,colSums(is.na(XlistCommon$meth))<0.1*nrow(methData)]
186
#impute remainin missing values by mean for each feautre across patients
187
XlistCommon$meth<-(apply(XlistCommon$meth, 2, ImputeByMean))
188
189
#final dimensions of the data
190
sapply(XlistCommon, dim)
191
```
192
193
194
Use top 20 PCs of methylation and expression as predictors.
195
```{r, fig.width=12, fig.height=10}
196
pcaMeth<-prcomp(XlistCommon$meth, center=T, scale. = F)
197
XlistCommon$MethPCs<-pcaMeth$x[,1:20]
198
colnames(XlistCommon$MethPCs)<-
199
  paste("meth",colnames(XlistCommon$MethPCs), sep="")
200
201
pcaExpr<-prcomp(XlistCommon$RNA, center=T, scale. = F)
202
XlistCommon$RNAPCs<-pcaExpr$x[,1:20]
203
colnames(XlistCommon$RNAPCs)<-paste("RNA",colnames(XlistCommon$RNAPCs), sep="")
204
```
205
206
207
Choose drug viabilites of interest as response variables.
208
```{r}
209
DOI <- c("D_006_1:5", "D_010_1:5", "D_159_1:5","D_002_4:5", "D_003_4:5",
210
         "D_012_4:5", "D_063_4:5", "D_166_4:5")
211
drugviab<-XlistCommon$drugs
212
drugviab<-drugviab[,DOI, drop=F]
213
colnames(drugviab) <- drugs[substr(colnames(drugviab),1,5),"name"]
214
```
215
216
217
Construct list of designs used and scale all predictors to mean zero and unit variance.
218
```{r}
219
ZPCs<-list(expression=XlistCommon$RNAPCs,
220
        genetic=XlistCommon$gen, 
221
        methylation= XlistCommon$MethPCs,
222
        demographics=XlistCommon$demographics, 
223
        IGHV=XlistCommon$IGHV,
224
        pretreated = XlistCommon$pretreated)
225
ZPCs$all<-do.call(cbind, ZPCs)
226
ZPCsunscaled<-ZPCs
227
ZPCsscaled<-lapply(ZPCs, scale)
228
lapply(ZPCsscaled, colnames)
229
```
230
231
Define colors.
232
```{r}
233
set1 <- brewer.pal(9,"Set1")
234
colMod<-c(paste(set1[c(4,1,5,3,2,7)],"88",sep=""), "grey")
235
names(colMod) <-
236
  c("demographics", "genetic", "IGHV","expression", "methylation", "pretreated",
237
    "all")
238
```
239
240
241
### Lasso using multi-omic data
242
243
Fit a linear model using Lasso explaining drug response by each one of the omic set separately as well as all together. As measure of explained variance use  R2 from linear models for unpenalized models (IGHV)
244
and fraction of variance explained, i.e. 1- cross-validated mean squared error/total sum of squares for others.
245
246
To ensure fair treatment of all features they are standardized to mean 0 and unit variance.
247
To study robustness the cross-validation is repeated 100-times to obtain the mean and standard deviation shown in the figure.
248
249
```{r, echo=F}
250
#Function to calculate Var Explained for Penalized Regression
251
R2ForPenRegPlusViz<-function(Z, drugviab, nfolds=10, alpha=1, nrep=100,
252
                      Parmfrow=c(2,4), ylimMax=0.4, standardize=TRUE){
253
Zlocal<-Z
254
255
set.seed(1030)
256
seeds<-sample(1:10000000, nrep)
257
258
RepeatedR2list<-lapply(1:nrep, function(outer){
259
#Use same folds for all omics to make comparable
260
set.seed(seeds[outer])
261
foldsAssignment<-sample(rep(seq(nfolds), length=nrow(drugviab)))
262
263
R2echOmicadj<-sapply(colnames(drugviab), function(dname) {
264
  d<-drugviab[,dname]
265
  sapply(names(Zlocal), function(nameZ) {
266
    pred<-Zlocal[[nameZ]]
267
    #fit a lasso model for omics with more than one features
268
    if(ncol(pred)>1){
269
    fitcv<-cv.glmnet(pred,d,family="gaussian", standardize=standardize,
270
                      alpha=alpha, foldid=foldsAssignment)
271
    R2 <- 1-min(fitcv$cvm)/fitcv$cvm[1]
272
    
273
    }else {#fit a liner model for single feautres (IGHV)
274
      fitlm<-lm(d~., data.frame(pred))
275
      R2<- summary(fitlm)$r.squared
276
    }
277
    R2
278
  })
279
}
280
  )
281
})
282
RepeatedR2<-RepeatedR2list
283
#calculate mean and sd across repitions wiht different folds
284
meanR2<-apply(simplify2array(RepeatedR2), 1:2, mean)
285
sdR2<-apply(simplify2array(RepeatedR2), 1:2, sd)
286
287
par(mfrow=Parmfrow, mar=c(7,5,3,3))
288
for (i in 1: ncol(meanR2)) {
289
    barc<-barplot(meanR2[,i], main= colnames(meanR2)[i], ylim=c(0,ylimMax),
290
                  las=2, col=colMod[rownames(meanR2)], ylab="R2")
291
    segments(barc, meanR2[,i]-sdR2[,i],barc, meanR2[,i]+sdR2[,i])
292
}
293
RepeatedR2
294
}
295
```
296
297
Fit model and show resulting omic-prediction profiles.
298
```{r lasso_main, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=9, fig.height=8}
299
#FIG# 5A
300
resultLassoPCA<-R2ForPenRegPlusViz(ZPCsscaled, drugviab, nfold=10, alpha=1,
301
                                   nrep=100, ylimMax=0.4)
302
303
df_resultLassoPCA <- melt(resultLassoPCA)
304
colnames(df_resultLassoPCA) <- c("omic", "drug", "R2", "run")
305
summaryR2 <- df_resultLassoPCA %>% group_by(omic, drug) %>% 
306
  dplyr::summarise(meanR2=mean(R2),sdR2 = sd(R2), nR2 = length(R2)) %>%
307
  mutate(seR2 = sdR2/sqrt(nR2))
308
309
ggplot(summaryR2, aes(x=omic, y=meanR2, fill=omic, group=omic))+
310
    geom_bar(stat = "identity") +  scale_fill_manual(values=colMod) + 
311
    geom_errorbar(aes(ymax = meanR2 + sdR2,ymin = meanR2 - sdR2), position = "dodge", width = 0.25) +facet_wrap(~drug, ncol=4) +theme_bw(base_size = 18) +ylab(bquote(R^2)) + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text = element_text(colour = "black")) +xlab("")+guides(fill=guide_legend(title="Data type")) 
312
```
313
314
315
Optionally, the model can also be fit for all drugs in the study.
316
```{r, eval=FALSE, fig.path='supp_'}
317
nfolds<-10
318
nrep<-100
319
320
DOI <-
321
  grepl("1:5",colnames(XlistCommon$drugs)) |
322
  grepl("4:5",colnames(XlistCommon$drugs))
323
drugviabAll<-XlistCommon$drugs
324
drugviabAll<-drugviabAll[,DOI]
325
colnames(drugviabAll) <- 
326
  paste0(drugs[substr(colnames(drugviabAll),1,5),"name"],
327
         substr(colnames(drugviabAll),6,9))
328
329
R2ForPenReg(Zscaled, drugviabAll, nfold=nfolds, alpha=1, nrep=nrep,
330
            Parmfrow=c(4,4), ylimMax=0.6)
331
```
332
333
334
## Lasso for drugs in a genetic-focussed model
335
336
We perform regression analysis by adaptive LASSO using genetic features, IGHV status (coded as 0 -1 for mutated/unmutated), pretreatment status (coded as 0 -1) and methylation cluster (coded as 0 for lowly-programmed (LP), 0.5 for intermediately-programmed (IP) and 1 for highly-programmed (HP)). For each model we select the optimal penalization parameter of the second step Lasso fit of the adaptive Lasso by repeated cross-validation to get robust results.
337
338
As output bar plots showing the coefficients of the selected predictors are produced.
339
340
341
### General definitions
342
343
We use the following abbreviations for the different data types.
344
345
|   | data type           |
346
|---|---------------------|
347
| M | methylation cluster |
348
| G | mutations           |
349
| V | viability           |
350
| I | ighv                |
351
| P | pretreatment        |
352
353
354
355
```{r echo=FALSE}
356
dataType = c(M="Methylation_Cluster", V="viab", G="gen", I="IGHV", P="pretreat")
357
```
358
359
Color definitions for the groups.
360
```{r echo=FALSE}
361
coldef<-list()
362
coldef["I"]<-brewer.pal(9, "Blues")[7]
363
coldef["M"]<-list(brewer.pal(9, "Blues")[c(1, 5, 9)])
364
coldef["G"]<-brewer.pal(8, "YlOrRd")[8]
365
coldef["P"]<-"chocolate4"
366
```
367
368
369
### Data pre-processing
370
371
Subselect CLL patients.
372
```{r}
373
lpdCLL = lpdAll[ , lpdAll$Diagnosis=="CLL"]
374
```
375
376
Prepare the data and limit the number of features by subselecting only those which include at least 5 recorded incidences. List the predictors.
377
```{r}
378
lpdCLL = lpdAll[ , lpdAll$Diagnosis=="CLL"]
379
lpdCLL = BloodCancerMultiOmics2017:::prepareLPD(lpd=lpdCLL, minNumSamplesPerGroup=5)
380
(predictorList = BloodCancerMultiOmics2017:::makeListOfPredictors(lpdCLL))
381
```
382
383
### Drug response prediction
384
385
The prediction will be made for the following drugs and concentrations.
386
387
|       | drug name     | conc |
388
|-------|---------------|------|
389
| D_159 | doxorubicine  | 1-5  |
390
| D_006 | fludarabine   | 1-5  |
391
| D_010 | nutlin-3      | 1-5  |
392
| D_166 | PRT062607 HCl | 4-5  |
393
| D_003 | idelalisib    | 4-5  |
394
| D_002 | ibrutinib     | 4-5  |
395
| D_012 | selumetinib   | 4-5  |
396
| D_063 | everolimus    | 4-5  |
397
398
399
400
```{r}
401
drs = list("1:5"=c("D_006", "D_010", "D_159"),
402
           "4:5"=c("D_002", "D_003", "D_012", "D_063", "D_166"))
403
```
404
405
```{r}
406
predvar = unlist(BloodCancerMultiOmics2017:::makePredictions(drs=drs,
407
                                 lpd=lpdCLL,
408
                                 predictorList=predictorList,
409
                                 lim=0.15, std=FALSE, adaLasso=TRUE,
410
                                 colors=coldef),
411
                 recursive=FALSE)
412
```
413
414
415
```{r echo=FALSE}
416
details = function(dr, what) {
417
  predvar[[dr]][["plot"]][[what]]
418
}
419
```
420
421
```{r prediction-D_006, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=details("D_006","width"), fig.height=details("D_006","height"), eval=TRUE}
422
#FIG# 5B
423
grid.draw(details("D_006","plot"))
424
```
425
426
```{r prediction-D_010, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=details("D_010","width"), fig.height=details("D_010","height"), eval=TRUE}
427
#FIG# 5B
428
grid.draw(details("D_010","plot"))
429
```
430
431
```{r prediction-D_159, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=details("D_159","width"), fig.height=details("D_159","height"), eval=TRUE}
432
#FIG# 5B
433
grid.draw(details("D_159","plot"))
434
```
435
436
```{r prediction-D_002, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=details("D_002","width"), fig.height=details("D_002","height"), eval=TRUE}
437
#FIG# 5B
438
grid.draw(details("D_002","plot"))
439
```
440
441
```{r prediction-D_003, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=details("D_003","width"), fig.height=details("D_003","height"), eval=TRUE}
442
#FIG# 5B
443
grid.draw(details("D_003","plot"))
444
```
445
446
```{r prediction-D_012, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=details("D_012","width"), fig.height=details("D_012","height"), eval=TRUE}
447
#FIG# 5B
448
grid.draw(details("D_012","plot"))
449
```
450
451
```{r prediction-D_063, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=details("D_063","width"), fig.height=details("D_063","height"), eval=TRUE}
452
#FIG# 5B
453
grid.draw(details("D_063","plot"))
454
```
455
456
```{r prediction-D_166, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=details("D_166","width"), fig.height=details("D_166","height"), eval=TRUE}
457
#FIG# 5B
458
grid.draw(details("D_166","plot"))
459
```
460
461
Plot the legends.
462
```{r}
463
legends = BloodCancerMultiOmics2017:::makeLegends(legendFor=c("G","I","M", "P"),
464
                                                  coldef)
465
```
466
467
```{r legend, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=legends[["width"]], fig.height=legends[["height"]]}
468
#FIG# 5B legend
469
grid.draw(legends[["plot"]])
470
```
471
472
Additionaly we plot the prediction for rotenone.
473
```{r}
474
drs_rot = list("4:5"=c("D_067"))
475
predvar_rot = unlist(BloodCancerMultiOmics2017:::makePredictions(drs=drs_rot,
476
                                 lpd=lpdCLL,
477
                                 predictorList=predictorList,
478
                                 lim=0.23, std=FALSE, adaLasso=TRUE,
479
                                 colors=coldef),
480
                 recursive=FALSE)
481
```
482
483
```{r prediction-D_067, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=predvar_rot[["D_067"]][["plot"]][["width"]], fig.height=predvar_rot[["D_067"]][["plot"]][["height"]], eval=TRUE}
484
#FIG# S26
485
grid.draw(predvar_rot[["D_067"]][["plot"]][["plot"]])
486
```
487
488
489
In a same way the prediction for all the drugs can be made.
490
```{r}
491
alldrs = unique(fData(lpdCLL)[fData(lpdCLL)$type=="viab","id"])
492
drs = list("1:5"=alldrs, "4:5"=alldrs)
493
```
494
495
```{r, eval=TRUE}
496
predvar2 = BloodCancerMultiOmics2017:::makePredictions(drs=drs,
497
                                        lpd=lpdCLL,
498
                                        predictorList=predictorList,
499
                                        lim=0.23,
500
                                        colors=coldef)
501
```
502
503
### Effect of pre-treatment
504
505
In order to find out the effect of pre-treatment on the predictions of drug response we provide the following overview. The summary is made for all drugs, separating however, on ranges of drug concentrations (mean drug effect of: all five (1-5) and two lowest (4-5) concentrations of the drugs).
506
507
```{r}
508
givePreatreatSum = function(predNum) {
509
  
510
  idx = sapply(predvar2[[predNum]], function(x) length(x)==1)
511
  predvar2[[predNum]] = predvar2[[predNum]][!idx]
512
  # get model coefficients and reshape
513
  coeffs <- do.call(cbind,lapply(predvar2[[predNum]], "[[", 'coeffs'))
514
  coeffs <- coeffs[-1,]
515
  coeffs <- as.matrix(coeffs)
516
  # colnames(coeffs) <- unlist(drs["1:5"])
517
  colnames(coeffs) = names(predvar2[[predNum]])
518
  colnames(coeffs) <- drugs[colnames(coeffs),"name"]
519
  coeffDF <- melt(as.matrix(coeffs))
520
  colnames(coeffDF) <- c("predictor", "drug", "beta")
521
  coeffDF$selected <- coeffDF$beta !=0
522
  
523
  #sort by times being selected
524
  coeffDF$predictor <- factor(coeffDF$predictor, level=)
525
  
526
  # number of drugs a predictor is chosen for
527
  gg1 <- coeffDF %>% group_by(predictor) %>% 
528
    dplyr::summarize(selectedSum = sum(selected)) %>%
529
    mutate(predictor = factor(predictor,
530
                              levels=predictor[order(selectedSum)])) %>%
531
    ggplot(aes(x=predictor, y=selectedSum)) + 
532
    geom_bar(stat="identity")+ylab("# drugs selected for") +
533
    coord_flip()
534
  
535
  # boxplots of non-zero coeffients
536
  orderMedian <- filter(coeffDF, selected) %>% group_by(predictor) %>% 
537
    dplyr::summarize(medianBeta = median(abs(beta)))
538
  coeffDF$predictor <- factor(
539
    coeffDF$predictor,
540
    levels=orderMedian$predictor[order(orderMedian$medianBeta)] )
541
  gg2 <- ggplot(filter(coeffDF, selected), aes(x=predictor, y=abs(beta))) +
542
    geom_boxplot() +
543
    coord_flip() + ggtitle("Distribution of non-zero coefficients")
544
  gridExtra::grid.arrange(gg1,gg2, ncol=1)
545
  
546
  # coefficeints per drug
547
  ggplot(filter(coeffDF, selected), 
548
         aes(x= drug, y=abs(beta), col= predictor=="Pretreatment")) +
549
    geom_point() +
550
    coord_flip()
551
  
552
  #drugs pretreatment is selected for
553
  as.character(filter(coeffDF, predictor=="Pretreatment" & beta!=0)$drug)
554
  PselDrugs <- as.character(
555
    filter(coeffDF, predictor=="Pretreatment" & beta!=0)$drug)
556
  length(PselDrugs)
557
  # length(drs[[1]])
558
}
559
```
560
561
__Conc. 1-5__
562
```{r pretreatment_c1-5, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=7, fig.height=10, eval=TRUE}
563
givePreatreatSum(predNum=1)
564
```
565
566
__Conc. 4-5__
567
```{r pretreatment_c4-5, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.width=7, fig.height=10, eval=TRUE}
568
givePreatreatSum(predNum=2)
569
```
570
571
572
573
```{r, include=!exists(".standalone"), eval=!exists(".standalone")}
574
sessionInfo()
575
```
576
577
```{r, message=FALSE, warning=FALSE, include=FALSE}
578
rm(list=ls())
579
```