a b/Analysis_3.R
1
library(tibble)
2
library(dplyr)
3
library(bmrm)
4
library(pROC)
5
library(glmnet)
6
library(xlsx)
7
library(dplyr)
8
#Functions
9
10
writeCSVMultiple <- function(x,file){
11
  if(dir.exists("file")!=F){
12
    cat("Creating folder",file,"\n")
13
    dir.create(file)
14
  }
15
  for(i in seq_along(x)){
16
    write.csv(x = x[[i]],
17
              file = paste(file, "_",names(x)[[i]],".csv",sep = ""))
18
  }
19
}
20
21
22
QFunclog <- function(x,Threshold) 
23
  ifelse(x>Threshold,log10(x+1),0)
24
QFuncjumpramp <- function(x,Threshold) 
25
  ifelse(x>Threshold,x,0)
26
27
28
printRemoveDropResults <- function(ThisResult,whichRound="Round 4"){
29
  cat("Sensitivity:",(mean(unlist(ThisResult$AllSensitivity))),"\n")
30
  
31
  print("Only this protein:")
32
  print((rownames(ThisResult$AllProbPredictionsMinusFeatureNIter) %>% 
33
           sapply(FUN = function(x) ThisResult$AllProbPredictionsMinusFeatureNIter[[x,"LR"]]%>% 
34
                    dplyr::select(-BV) %>%
35
                    summarise_all(.funs = funs(mean(.[which(CancerStatus==T)]>ThisResult$AllThresh99NIter[[x,"LR"]]))))) %>% 
36
          apply(MARGIN = c(1,2),as.numeric) %>% 
37
          rowMeans()%>% sort()%>% t()%>% t())
38
  
39
  
40
  print("Remove this protein:")
41
  print((rownames(ThisResult$AllProbPredictionsMinusFeatureNIter) %>% 
42
           sapply(FUN = function(x) ThisResult$AllProbPredictionsDropFeatureNIter[[x,"LR"]]%>% 
43
                    dplyr::select(-BV) %>%
44
                    summarise_all(.funs = funs(mean(.[which(CancerStatus==T)]>ThisResult$AllThresh99NIter[[x,"LR"]]))))) %>% 
45
          apply(MARGIN = c(1,2),as.numeric) %>% 
46
          rowMeans()%>%sort()%>%t()%>%t())
47
  
48
  
49
  mycoefs <- ThisResult$Alllogit %>% coef(s=0) %>% abs() 
50
  FirstIteration <- ThisResult$Master4ClassificationwithClassAll[whichRound,"LR"][[1]] 
51
  MinDiff <- FirstIteration  %>% 
52
    mutate_at(.vars = setdiff(colnames(FirstIteration),c("CancerStatus","maxOmega","fold")),
53
              .funs=funs(QFuncjumpramp(.,quantile(.[which(CancerStatus==T)],probs=0.95)))) %>% 
54
    summarise_at(.vars = setdiff(colnames(FirstIteration),
55
                                 c("CancerStatus","fold")),
56
                 .funs = funs(mean(.[which(CancerStatus==T)])-mean(.[which(CancerStatus==F)]))) %>%
57
    as.data.frame() 
58
  
59
  
60
  RankingCoefs <- data.frame('Ranking Score'= as.vector(MinDiff*mycoefs[names(MinDiff),1]) %>%
61
                               t(),
62
                             'Coefs' = mycoefs[names(MinDiff),1] %>% 
63
                               t()
64
                             %>%t())
65
  
66
  print(RankingCoefs%>% 
67
          rownames_to_column("Protein")%>%
68
          arrange(Ranking.Score))
69
  
70
  
71
}
72
73
ProbsEachRound <-  function(ThisResult,csvFile=NULL){
74
  Probs = ThisResult$AllProbPredictionsNIter[,"LR"] %>% sapply(FUN = function(x) x)
75
  Probs <- rbind(Probs,Thresh=ThisResult$AllThresh99NIter[,"LR"] %>% sapply(FUN = function(x) x))
76
  Probs <- cbind(Probs,
77
                 CancerStatus =
78
                   ThisResult$Master4ClassificationwithClassAll[["Round 1","LR"]][rownames(Probs),"CancerStatus"])
79
  
80
  if(!is.null(csvFile))
81
    write.csv(file = csvFile,x = Probs)
82
  return(Probs)
83
}
84
85
Generate4LudaObject <- function(ThisResult,rdaFile,TissueType=TissueType_RFData){
86
  ForLuda <- sapply(setNames(paste("Round",1:10),paste("Round",1:10)),
87
                     function(x) cbind(ThisResult$Master4ClassificationwithClassAll[[x,"LR"]],
88
                                       LogitCall=ThisResult$AllpredictionsNIter[[x,"LR"]],
89
                                       LogitProb=ThisResult$AllProbPredictionsNIter[[x,"LR"]],
90
                                       Tissue=TissueType_RFData[rownames(ThisResult$Master4ClassificationwithClassAll[[x,"LR"]])
91
                                         
92
                                       ]),simplify = F)
93
  ForLudaFeatureRanks <- ThisResult$AlllogitFeaturesRank
94
  ForLudaSensitivity <- ThisResult$AllSensitivity
95
  
96
  
97
  save(file=rdaFile,
98
       list= c("ForLuda","ForLudaFeatureRanks","ForLudaSensitivity","ThisResult"))
99
  
100
  return( list(ForLuda=ForLuda,
101
             ForLudaFeatureRanks=ForLudaFeatureRanks,
102
             ForLudaSensitivity=ForLudaSensitivity,
103
             ThisResult=ThisResult))
104
  
105
  
106
}
107
108
109
cv.CancervsNormal.QuantileReplace.Mutatation.knownCV <- function(CancerStatus,
110
                                                                 # a vector containing cancer status (T=Cancer)
111
                                                                 Proteins,
112
                                                                 # a matrix of protien vector
113
                                                                 Mutations,
114
                                                                 # a matrix of Mutations
115
                                                                 # It must at include BV.,
116
                                                                 # iteration , fold
117
                                                                 
118
                                                                 #Methods=c("RF","Logit"),
119
                                                                 # Methods to test
120
                                                                 ProteinsHigherCancer = T,
121
                                                                 #If true, we only keep proteins with higher median in cancer in the sense of wilcoxon
122
                                                                 ForcePositiveCoeff = F,
123
                                                                 #If true, we force the coefficients to be positive (This is togher measure than ProteinsHigherCancer)
124
                                                                 #myseed=1234,
125
                                                                 #NIter=10,
126
                                                                 #nfold=10,
127
                                                                 SpecSet = 0.99, #Specificity we shoot for
128
                                                                 FeaturesToQuantile, # Features to quantile, set to Null if you do not wish quantiling
129
                                                                 QuantileF=0.95,
130
                                                                 QuantileFunction= function(x,Threshold) 
131
                                                                   ifelse(x>Threshold,x,0),
132
                                                                 NoPenaltyonCtdna=F,
133
                                                                 NoLassoPenaly = F){
134
  
135
  
136
  
137
  mylower <- ifelse(ForcePositiveCoeff,0,-Inf)
138
  if(ProteinsHigherCancer){
139
    #apply Wilcoxon to filter proteins if ProteinsHigherCancer==T
140
    
141
    Proteins2Use <- (apply(Proteins,MARGIN = 2,
142
                           FUN = function(x) wilcox.test(
143
                             x[which(CancerStatus==T)],
144
                             x[which(CancerStatus==F)],
145
                             alternative = "greater")$p.value)<0.05) %>%
146
      which() %>% 
147
      names()
148
    
149
    
150
    # Proteins2Use <- (colMeans(Proteins[which(CancerStatus==T),,drop=F])- 
151
    #                    colMeans(Proteins[which(CancerStatus==F),,drop=F])>0) %>% 
152
    #   which() %>% 
153
    #   names()
154
    
155
  }else{
156
    Proteins2Use <- colnames(Proteins)
157
  }
158
  
159
  
160
  # if(is.null(Mutations)){
161
  #   Master4ClassificationwithClass <- data.frame(CancerStatus=CancerStatus,
162
  #                                                Proteins[,Proteins2Use,drop=F])
163
  # }else{
164
  
165
  
166
  #}
167
  
168
  #Remove the penalty for ctdna if NoPenaltyonCtdna==T
169
  if(NoPenaltyonCtdna){
170
    PenaltyFactor <- c(rep(1,length(Proteins2Use)),rep(0,ncol(Mutations)-3))
171
  }else{
172
    PenaltyFactor <-c(rep(1,length(Proteins2Use)),rep(1,ncol(Mutations)-3))
173
  }
174
  
175
  #Only FeaturesToQuantile which have survived the Wilcoxon are being quantiles
176
  if(!is.null(FeaturesToQuantile))
177
    FeaturesToQuantile <- intersect(FeaturesToQuantile,Proteins2Use)
178
  
179
  #RFFeatures <- colnames(Master4ClassificationwithClass) %>% setdiff("CancerStatus")
180
  
181
  
182
  #augmenting the quantiled and non-quantiled proteins
183
  MutationsRound1st <- Mutations %>% 
184
    filter(iteration==1) %>%
185
    column_to_rownames("BV") 
186
  MutationsRound1st <- MutationsRound1st[rownames(Proteins),]
187
  MutationsRound1st[is.na(MutationsRound1st)] <- 0
188
  
189
  Master4ClassificationwithClass <- data.frame(CancerStatus=CancerStatus,
190
                                               Proteins[,Proteins2Use,drop=F],
191
                                               MutationsRound1st %>% 
192
                                                 dplyr::select(-iteration,-fold))
193
  if(is.null(FeaturesToQuantile)){
194
    Master4ClassificationwithClassAppQuantAug <-
195
      Master4ClassificationwithClass
196
  }else{
197
    
198
    
199
    ThrehsoldsTraining <- Master4ClassificationwithClass %>% 
200
      filter(CancerStatus==F) %>%
201
      summarise_at(.vars = FeaturesToQuantile,# to implement ramp you can manipulate here
202
                   .funs = funs(quantile(.,probs = QuantileF))) 
203
    
204
    Master4ClassificationwithClassAppQuantAug <- # to implement ramp you can manipulate here
205
      Master4ClassificationwithClass %>% 
206
      mutate_(.dots = setNames(paste("QuantileFunction(",
207
                                     names(ThrehsoldsTraining),
208
                                     ",",
209
                                     ThrehsoldsTraining,")",
210
                                     sep = ""),
211
                               names(ThrehsoldsTraining)))
212
    
213
  } 
214
  
215
  #Do cross-validation without quantiling for the Logit
216
  Alllogit <- cv.glmnet(x= Master4ClassificationwithClassAppQuantAug %>% 
217
                          dplyr::select(-CancerStatus) %>%
218
                          as.matrix(),
219
                        y=Master4ClassificationwithClass$CancerStatus,
220
                        type.measure = "auc",
221
                        family="binomial",
222
                        nfolds = length(unique(MutationsRound1st$fold)),
223
                        penalty.factor = PenaltyFactor,
224
                        lower.limits=mylower,
225
                        keep=TRUE)
226
  AllLogitThreshod <- apply(Alllogit$fit.preval[which(Master4ClassificationwithClass$CancerStatus==F),],
227
                            MARGIN = 2,
228
                            FUN = quantile,probs=SpecSet,na.rm=T) %>% 
229
    as.numeric() ## to implement ramp you can manipulate here
230
  
231
  AllSensity <- sapply(seq_along(AllLogitThreshod),
232
                       FUN = function(x) 
233
                         mean(Alllogit$fit.preval[ which(Master4ClassificationwithClass$CancerStatus==T),x]
234
                              >AllLogitThreshod[x]))
235
  #find the best lambda suited for desired SpecSet
236
  if(NoLassoPenaly){
237
    lambda.best <- 0
238
  }else{
239
    lambda.best <-  Alllogit$lambda[which.max(AllSensity)]
240
  }
241
  AlllogitFeatures <- which(as.matrix(coef(Alllogit,s=lambda.best))!=0,
242
                            arr.ind = T,
243
                            useNames = T) %>% 
244
    rownames() %>% 
245
    setdiff("(Intercept)")
246
  
247
  NIter <- unique(Mutations$iteration)
248
  
249
  AllProbPredictionsNIter <- matrix(data = list(),
250
                                    ncol = 1,
251
                                    nrow = length(NIter),
252
                                    dimnames = list(paste("Round",NIter),
253
                                                    "LR"))
254
  AllThresh99NIter <- AllProbPredictionsNIter
255
  
256
  AllpredictionsNIter <- AllProbPredictionsNIter
257
  AllProbPredictionsNIter <- AllProbPredictionsNIter
258
  AllSensitivity <- AllProbPredictionsNIter
259
  Master4ClassificationwithClassAll <- AllProbPredictionsNIter
260
  AllProbPredictionsMinusFeatureNIter <- AllProbPredictionsNIter
261
  AllProbPredictionsDropFeatureNIter <- AllProbPredictionsNIter
262
  #set.seed(myseed)
263
  #AllMyIndices <- sapply(1:NIter,FUN = function(x)
264
  #  balanced.cv.fold(Master4ClassificationwithClass$CancerStatus) %>% as.numeric,simplify = F) 
265
  
266
  #names(AllMyIndices)<- paste("Round",NIter,sep = "")
267
  
268
  set.seed(1234)
269
  for( j in NIter){
270
    #MyIndices <- AllMyIndices[[j]]
271
    ThisRound <-  Mutations %>%
272
      filter(iteration==j) 
273
    
274
    uniqueMyIndices <- ThisRound %>% 
275
      dplyr::select(fold) %>% 
276
      unlist() %>% 
277
      unique()
278
    
279
    Allpredictions <- setNames(vector(length = nrow(Proteins)),rownames(Proteins))
280
    ProbPredictions <- setNames(vector(length = nrow(Proteins),mode = "numeric") ,rownames(Proteins))
281
    ProbPredictionsMinusFeature <- matrix(0,
282
                                          nrow=nrow(Proteins),
283
                                          ncol= ncol(Master4ClassificationwithClass)-1 ,
284
                                          dimnames = list(rownames(Proteins),Master4ClassificationwithClass %>% colnames() %>% setdiff("CancerStatus"))
285
                                          
286
                                          
287
    )
288
    ProbPredictionsDropFeature <- ProbPredictionsMinusFeature
289
    #for(k in seq_along(Methods)){
290
    MutationsThisRound <- ThisRound %>%
291
      column_to_rownames("BV")
292
    MutationsThisRound <- MutationsThisRound[rownames(Proteins),]
293
    MutationsThisRound[is.na(MutationsThisRound)] <- 0
294
    
295
    Master4ClassificationwithClass <- data.frame(CancerStatus=CancerStatus,
296
                                                 Proteins[,Proteins2Use,drop=F],
297
                                                 MutationsThisRound %>% 
298
                                                   dplyr::select(-iteration,-fold))
299
    for( i in uniqueMyIndices){
300
      trainSamples <-  ThisRound %>% #which(MyIndices!=i)
301
        filter(fold!=i) %>%
302
        dplyr::select(BV) %>%
303
        unlist()
304
      testSamples <- ThisRound %>% #which(MyIndices!=i)
305
        filter(fold==i) %>%
306
        dplyr::select(BV) %>%
307
        unlist()
308
      
309
      
310
      if(is.null(FeaturesToQuantile)){
311
        Master4ClassificationwithClassAppQuantAug <-
312
          Master4ClassificationwithClass
313
      }else{
314
        
315
        ThrehsoldsTraining <- Master4ClassificationwithClass[trainSamples,] %>% 
316
          filter(CancerStatus==F) %>%
317
          summarise_at(.vars = FeaturesToQuantile,# to implement ramp you can manipulate here
318
                       .funs = funs(quantile(.,probs = QuantileF))) 
319
        
320
        Master4ClassificationwithClassAppQuantAug <- # to implement ramp you can manipulate here
321
          Master4ClassificationwithClass %>% 
322
          mutate_(.dots = setNames(paste("QuantileFunction(",
323
                                         names(ThrehsoldsTraining),
324
                                         ",",
325
                                         ThrehsoldsTraining,")",
326
                                         sep = ""),
327
                                   names(ThrehsoldsTraining)))
328
        
329
        rownames(Master4ClassificationwithClassAppQuantAug) <- rownames(Master4ClassificationwithClass)
330
      }
331
      #if(Methods[k]=="Logit"){
332
      
333
      thislogit <- glmnet(x= Master4ClassificationwithClassAppQuantAug[trainSamples,] %>% 
334
                            dplyr::select(-CancerStatus) %>% 
335
                            as.matrix(),
336
                          y=Master4ClassificationwithClassAppQuantAug[trainSamples,"CancerStatus"],
337
                          #lambda = lambda.best,
338
                          penalty.factor = PenaltyFactor,
339
                          family="binomial",
340
                          lower.limits=mylower)
341
      ProbPredictions[testSamples] <- predict(thislogit,
342
                                              Master4ClassificationwithClassAppQuantAug[testSamples,]%>%
343
                                                dplyr::select(-CancerStatus) %>% 
344
                                                as.matrix(),
345
                                              type="response",
346
                                              s=lambda.best) %>% 
347
        unlist()
348
      
349
      for( k in Master4ClassificationwithClassAppQuantAug%>% colnames() %>% setdiff("CancerStatus")){
350
        z <- Master4ClassificationwithClassAppQuantAug[testSamples,] %>%
351
          dplyr::select(-CancerStatus) %>%
352
          as.matrix()
353
        z[,setdiff(colnames(z),k)] <- 0
354
        
355
        ProbPredictionsMinusFeature[testSamples,k]<- predict(thislogit,
356
                                                             z,
357
                                                             type="response",
358
                                                             s=lambda.best) %>% 
359
          unlist()
360
        
361
        z <- Master4ClassificationwithClassAppQuantAug[testSamples,] %>%
362
          dplyr::select(-CancerStatus) %>%
363
          as.matrix()
364
        z[,k] <- 0
365
        
366
        ProbPredictionsDropFeature[testSamples,k]<- predict(thislogit,
367
                                                            z,
368
                                                            type="response",
369
                                                            s=lambda.best) %>% 
370
          unlist()
371
      }
372
      #}else if(Methods[k]=="RF"){
373
      
374
      
375
      
376
      
377
      #  ThisRF <- randomForest(as.factor(CancerStatus)~.,
378
      #                         data =Master4ClassificationwithClassAppQuantAug[,c("CancerStatus",RFFeatures)] ,
379
      #                         subset=trainSamples,
380
      #                         cutoff = c(0.15,0.85))
381
      #  ProbPredictions[testSamples] <- predict(ThisRF,Master4ClassificationwithClassAppQuantAug[testSamples,],type="prob")[,"TRUE"]
382
      #}
383
    }
384
    
385
    ThisThresh99 <- ProbPredictions[which(Master4ClassificationwithClass$CancerStatus==F)] %>%
386
      quantile(SpecSet)
387
    Allpredictions <- ProbPredictions>ThisThresh99
388
    AllThresh99NIter[[j]] <- ThisThresh99
389
    AllpredictionsNIter[[j]] <- Allpredictions
390
    AllProbPredictionsNIter[[j]] <- ProbPredictions
391
    AllProbPredictionsMinusFeatureNIter[[j]] <- as.data.frame(ProbPredictionsMinusFeature) %>%
392
      rownames_to_column("BV") %>%
393
      mutate( CancerStatus =
394
                Master4ClassificationwithClass[rownames(ProbPredictionsMinusFeature),
395
                                               "CancerStatus"]) 
396
    AllProbPredictionsDropFeatureNIter[[j]] <- as.data.frame(ProbPredictionsDropFeature) %>%
397
      rownames_to_column("BV") %>%
398
      mutate( CancerStatus =
399
                Master4ClassificationwithClass[rownames(ProbPredictionsMinusFeature),
400
                                               "CancerStatus"])
401
    AllSensitivity[[j]] <- Allpredictions[which(Master4ClassificationwithClass$CancerStatus==T)]  %>% 
402
      mean()
403
    
404
    Master4ClassificationwithClassAll[[j]] <- cbind(Master4ClassificationwithClass,
405
                                                    fold=
406
                                                      MutationsThisRound[rownames(Master4ClassificationwithClass),"fold"])
407
    
408
    #} 
409
    
410
    
411
  }
412
  
413
  return(list(Master4ClassificationwithClassAll=Master4ClassificationwithClassAll,
414
              AllThresh99NIter=AllThresh99NIter,
415
              AllpredictionsNIter=AllpredictionsNIter,
416
              AllProbPredictionsNIter=AllProbPredictionsNIter,
417
              AllProbPredictionsMinusFeatureNIter =AllProbPredictionsMinusFeatureNIter,
418
              AllProbPredictionsDropFeatureNIter = AllProbPredictionsDropFeatureNIter,
419
              AllSensitivity=AllSensitivity,
420
              AllSensitivitycvglmnet=max(AllSensity,na.rm = T), # This comes out of Logit cv and more reliable
421
              AllcvglmnetCalls = Alllogit$fit.preval[,which.max(AllSensity)],
422
              ProteinsHigherCancer=ProteinsHigherCancer,
423
              NoMutations=is.null(Mutations),
424
              Proteins2Use = Proteins2Use,
425
              Alllogit=Alllogit,
426
              AlllogitFeatures=AlllogitFeatures,
427
              #RFFeatures=RFFeatures,
428
              lambda.best=lambda.best))
429
  
430
}
431
432
433
434
#Preparing Data
435
436
Master_Protein_Dataset_12_9_17_Logist <- read.xlsx2("MEGA Protein Dataset for Logistic Regression, December 9, 2017.xlsx",
437
                                                    sheetIndex = 1,
438
                                                    stringsAsFactors = F)
439
440
441
Master_Protein_Dataset_12_9_17_RF <- read.xlsx2("MEGA Protein Dataset for Random Forest, December 9, 2017.xlsx",
442
                                                sheetIndex = 1,
443
                                                stringsAsFactors = F)
444
445
Master_Protein_Filtered_RF <-
446
  #Master_Protein_Dataset_20_10_17 %>%
447
  Master_Protein_Dataset_12_9_17_RF %>%
448
  #filter(!(BV. %in% PostSample)) %>%
449
  #Master_Protein_Dataset_9_16_17 %>%
450
  #filter(BV. %in% Master_Protein_Dataset_1_10_17$BV.)%>%
451
  dplyr::select(-`LRG.1`,-Vitronectin)
452
453
454
TissueType_RFData <- setNames(Master_Protein_Filtered_RF$Sample.Type,Master_Protein_Filtered_RF$BV.)
455
# ajdusting for limits of detection for each protein?
456
OnlyProteins_RF <- Master_Protein_Filtered_RF %>%
457
458
  dplyr::select(-BV.,-Stage,-Sample.Type) %>% 
459
  apply(MARGIN = 2,FUN = as.numeric) %>% 
460
  as.data.frame()
461
462
463
OnlyProteins_RF[is.na(OnlyProteins_RF)] <- 0
464
465
colnames(OnlyProteins_RF) <- gsub(colnames(OnlyProteins_RF),pattern = "[.]",replacement = "")
466
rownames(OnlyProteins_RF) <- Master_Protein_Filtered_RF$BV.
467
468
469
Master_Protein_Filtered_LR <-
470
  #Master_Protein_Dataset_20_10_17 %>%
471
  Master_Protein_Dataset_12_9_17_Logist %>%
472
  #filter(!(BV. %in% PostSample)) %>%
473
  #Master_Protein_Dataset_9_16_17 %>%
474
  #filter(BV. %in% Master_Protein_Dataset_1_10_17$BV.)%>%
475
  dplyr::select(-`LRG.1`,-Vitronectin)
476
477
# ajdusting for limits of detection for each protein?
478
OnlyProteins_LR <- Master_Protein_Filtered_LR %>%
479
  dplyr::select(-BV.,-Stage,-Sample.Type) %>% 
480
  apply(MARGIN = 2,FUN = as.numeric) %>% 
481
  as.data.frame()
482
483
OnlyProteins_LR[is.na(OnlyProteins_LR)] <- 0
484
485
colnames(OnlyProteins_LR) <- gsub(colnames(OnlyProteins_LR),pattern = "[.]",replacement = "")
486
rownames(OnlyProteins_LR) <- Master_Protein_Filtered_LR$BV.
487
488
489
490
## CtdNA
491
492
load("maxValuesPerSample_20171209_FORJosh.rda")
493
sampleTable_ALLModified <- 
494
  sampleTable_ALL[,c("iteration","fold","maxOmega","CosmicCount","Sample.Category")] %>% 
495
  rownames_to_column("BV") %>% 
496
  mutate(BV=ifelse(iteration==1,BV,substr(BV,1,nchar(BV)-1))) 
497
498
NewCtDNA_ALLRounds <- sampleTable_ALLModified %>% 
499
  filter(BV %in% Master_Protein_Filtered_LR$BV.)%>% 
500
  transmute(BV=BV,
501
            iteration=iteration,
502
            fold=fold,
503
            maxOmega= maxOmega#,maxOmegaThresh),
504
            #CosmicCount=CosmicCount,
505
            #maxOmegaCount= QFunclog(CosmicCount*maxOmega,maxOmegaCountThresh)
506
  ) 
507
508
NoNewCtDNA <- setdiff(Master_Protein_Filtered_LR$BV.,NewCtDNA_ALLRounds$BV)
509
NoNewCtDNA_ALLRounds <- bind_rows(sapply(unique(NewCtDNA_ALLRounds$iteration),
510
                                         FUN = function(x) 
511
                                           data.frame(BV=NoNewCtDNA,
512
                                                      iteration=x,
513
                                                      fold=as.numeric(balanced.cv.fold(NoNewCtDNA,
514
                                                                                       length(unique(NewCtDNA_ALLRounds$fold)))),
515
                                                      maxOmega=0),
516
                                         simplify = F) )
517
518
NewCtDNA_ALLRounds_Modified <- rbind(NewCtDNA_ALLRounds,NoNewCtDNA_ALLRounds)
519
520
521
522
### RF Analysis#########
523
#### with CtDNA
524
525
OnlyTenProteinsNoCosmicRemovedNewTemplatesRFData <-cv.CancervsNormal.QuantileReplace.Mutatation.knownCV(
526
  CancerStatus = 
527
    1*(Master_Protein_Filtered_RF$Sample.Type !="Normal"),
528
  Proteins = 
529
    OnlyProteins_RF %>%
530
    dplyr::select(
531
      CA199,
532
      CEA,
533
      CA125,
534
      #AFP,
535
      Prolactin,
536
      HGF,
537
      OPN   ,
538
      TIMP1,
539
      #Follistatin,
540
      #GCSF,
541
      #HE4,
542
      #CA153,
543
      #IL6,
544
      #Midkine,
545
      Myeloperoxidase#,
546
      #CYFRA211,
547
      #Galectin3#,
548
      #Thrombospondin2
549
    )   %>%
550
    as.matrix() ,
551
  
552
  ProteinsHigherCancer = T,
553
  NewCtDNA_ALLRounds_Modified %>% 
554
    mutate(maxOmega=QFuncjumpramp(maxOmega,0)
555
           #,
556
           #maxOmegaCount=QFunclog(maxOmegaCount,Inf) 
557
    ),
558
  FeaturesToQuantile = 
559
    c("CA199",
560
      "CEA",
561
      "CA125",
562
      "Prolactin",
563
      "HGF",
564
      "OPN" ,
565
      "TIMP1",
566
      #"IL6",
567
      #"Midkine",
568
      "Myeloperoxidase"),
569
  QuantileFunction = function(x,Threshold) 
570
    ifelse(x>Threshold,x,0),
571
  QuantileF = 0.95,
572
  ForcePositiveCoeff = F,
573
  NoLassoPenaly = T )
574
575
576
### Removing Protein Results
577
mainPath <- "Results/"
578
dir.create(mainPath,recursive = T)
579
580
sink(paste(mainPath,"ResultSummary.txt",sep = ""))
581
print("---------RF Data Results with 8 Proteins---------")
582
printRemoveDropResults(ThisResult = OnlyTenProteinsNoCosmicRemovedNewTemplatesRFData)
583
584
ProbsRFData <- ProbsEachRound(OnlyTenProteinsNoCosmicRemovedNewTemplatesRFData,
585
                              csvFile = paste(mainPath,"RFDatawithCtdna8proteins.csv",sep = "") )
586
587
LudaObject <- Generate4LudaObject(ThisResult = OnlyTenProteinsNoCosmicRemovedNewTemplatesRFData,
588
                    rdaFile = paste(mainPath,"ForLuda8proteins.rda",sep = "") )
589
590
591
592
593
#Print scores
594
### Save Logit Scores
595
dir.create(paste(mainPath,"OnlyThisProtein/",sep = ""),recursive = T)
596
writeCSVMultiple(rownames(OnlyTenProteinsNoCosmicRemovedNewTemplatesRFData$AllProbPredictionsMinusFeatureNIter) %>% 
597
                   sapply(FUN = function(x) OnlyTenProteinsNoCosmicRemovedNewTemplatesRFData$AllProbPredictionsMinusFeatureNIter[[x,"LR"]]%>% 
598
                            mutate(Threshold=OnlyTenProteinsNoCosmicRemovedNewTemplatesRFData$AllThresh99NIter[[x,"LR"]]),simplify = F),
599
                 file = paste(mainPath,"OnlyThisProtein/",sep = ""))
600
601
### Save Logit Scores
602
dir.create(paste(mainPath,"RemoveThisPrtoein/",sep = ""),recursive = T)
603
604
writeCSVMultiple(rownames(OnlyTenProteinsNoCosmicRemovedNewTemplatesRFData$AllProbPredictionsDropFeatureNIter) %>% 
605
                   sapply(FUN = function(x) OnlyTenProteinsNoCosmicRemovedNewTemplatesRFData$AllProbPredictionsDropFeatureNIter[[x,"LR"]]%>% 
606
                            mutate(Threshold=OnlyTenProteinsNoCosmicRemovedNewTemplatesRFData$AllThresh99NIter[[x,"LR"]]),simplify = F),
607
                 file = paste(mainPath,"RemoveThisPrtoein/",sep = ""))
608
609
610
### without CtdNA
611
612
613
OnlyTenProteinsNoCosmicRemovedNewTemplatesRFDatawithoutCtDNA <-cv.CancervsNormal.QuantileReplace.Mutatation.knownCV(
614
  CancerStatus = 
615
    1*(Master_Protein_Filtered_RF$Sample.Type !="Normal"),
616
  Proteins = 
617
    OnlyProteins_RF %>%
618
    dplyr::select(
619
      CA199,
620
      CEA,
621
      CA125,
622
      #AFP,
623
      Prolactin,
624
      HGF,
625
      OPN   ,
626
      TIMP1,
627
      #Follistatin,
628
      #GCSF,
629
      #HE4,
630
      #CA153,
631
      #IL6,
632
      #Midkine,
633
      Myeloperoxidase#,
634
      #CYFRA211,
635
      #Galectin3#,
636
      #Thrombospondin2
637
    )   %>%
638
    as.matrix() ,
639
  
640
  ProteinsHigherCancer = T,
641
  NewCtDNA_ALLRounds_Modified %>% 
642
    mutate(maxOmega=QFuncjumpramp(maxOmega,Inf)
643
           #,
644
           #maxOmegaCount=QFunclog(maxOmegaCount,Inf) 
645
    ),
646
  FeaturesToQuantile = 
647
    c("CA199",
648
      "CEA",
649
      "CA125",
650
      "Prolactin",
651
      "HGF",
652
      "OPN" ,
653
      "TIMP1",
654
      #"IL6",
655
      #"Midkine",
656
      "Myeloperoxidase"),
657
  QuantileFunction = function(x,Threshold) 
658
    ifelse(x>Threshold,x,0),
659
  QuantileF = 0.95,
660
  ForcePositiveCoeff = F,
661
  NoLassoPenaly = T )
662
663
664
printRemoveDropResults(ThisResult = OnlyTenProteinsNoCosmicRemovedNewTemplatesRFDatawithoutCtDNA)
665
666
ProbsRFData <- ProbsEachRound(OnlyTenProteinsNoCosmicRemovedNewTemplatesRFDatawithoutCtDNA,
667
                              csvFile = paste(mainPath,"RFDatawithCtdna8proteinswithoutCtdna.csv",sep = "") )
668
669
670
671
672
### Removing Each Protein at a time
673
674
EightProteins <- c("CA199",
675
                   "CEA",
676
                   "CA125",
677
                   "Prolactin",
678
                   "HGF",
679
                   "OPN"    ,
680
                   "TIMP1",
681
                   "Myeloperoxidase")
682
683
SensOnceEightProteins <- setNames(rep(0,length(EightProteins)),EightProteins)
684
685
for(i in seq_along(EightProteins)){
686
  
687
  EightProteinsRemoved <- setdiff(EightProteins,EightProteins[i])
688
  
689
  OnlyTenProteinsNoCosmicRemovedNewTemplatesRFDataRemoveOneProtein <-
690
    cv.CancervsNormal.QuantileReplace.Mutatation.knownCV(
691
    CancerStatus = 
692
      1*(Master_Protein_Filtered_RF$Sample.Type !="Normal"),
693
    Proteins = 
694
      OnlyProteins_RF[, EightProteinsRemoved] %>%
695
      as.matrix() ,
696
    
697
    ProteinsHigherCancer = T,
698
    NewCtDNA_ALLRounds_Modified %>% 
699
      mutate(maxOmega=QFuncjumpramp(maxOmega,0)
700
             #,
701
             #maxOmegaCount=QFunclog(maxOmegaCount,Inf) 
702
      ),
703
    FeaturesToQuantile = 
704
      EightProteinsRemoved,
705
    QuantileFunction = function(x,Threshold) 
706
      ifelse(x>Threshold,x,0),
707
    QuantileF = 0.95,
708
    ForcePositiveCoeff = F,
709
    NoLassoPenaly = T )
710
  
711
  ProbsRFData <- ProbsEachRound(OnlyTenProteinsNoCosmicRemovedNewTemplatesRFDataRemoveOneProtein,
712
                                csvFile = paste(mainPath,"RFDatawithCtdna8proteinswithout",EightProteins[i], ".csv",sep = "") )
713
714
    
715
    SensOnceEightProteins[i] <- OnlyTenProteinsNoCosmicRemovedNewTemplatesRFDataRemoveOneProtein$AllSensitivity %>% 
716
                                                                                        unlist() %>% 
717
                                                                                          mean()
718
      
719
}
720
721
cat("--- Remove Protein------\n")
722
print(t(t(SensOnceEightProteins)))
723
724
sink()