a b/Scripts/MakeKM.R
1
MakeKM <- function(data,average.risk,topHits,LT,numGroups,cuts,geneList){
2
  
3
  data$lvl4Groups <- rep(NA,nrow(data))
4
  qts <- as.numeric(quantile(average.risk,cuts))
5
  
6
  if(numGroups ==2){
7
    for(i in 1:nrow(data)){
8
      
9
      # for 2 level groups
10
      if(average.risk[i] < qts) {
11
        data$lvl4Groups[i] <- "Low"
12
      }
13
      
14
      if(average.risk[i] >= qts) {
15
        data$lvl4Groups[i] <- "High"
16
      }
17
    }
18
  }
19
  
20
  if(numGroups == 3){
21
    
22
    for(i in 1:nrow(data)){
23
      
24
      # for 3 level groups
25
      if(average.risk[i] < qts[1]) {
26
        data$lvl4Groups[i] <- "Low"
27
      }
28
      if(average.risk[i] >= qts[1] && average.risk[i] < qts[2]){
29
        data$lvl4Groups[i] <- "Intermediate"
30
      }
31
      if(average.risk[i] >= qts[2]) {
32
        data$lvl4Groups[i] <- "High"
33
      }
34
    }
35
  }
36
  
37
  if(numGroups == 4){
38
    #qts <- quantile(average.risk,probs = c(0.25,0.75,0.9))
39
    for(i in 1:nrow(data)){
40
      
41
      # for 4 level groups
42
      if(average.risk[i] < qts[1]) {
43
        data$lvl4Groups[i] <- "Low"
44
      }
45
      if(average.risk[i] >= qts[1] && average.risk[i] < qts[2]){
46
        data$lvl4Groups[i] <- "Low-intermediate"
47
      }
48
      if(average.risk[i] >= qts[2] && average.risk[i] < qts[3]){
49
        data$lvl4Groups[i] <- "High-intermediate"
50
      }
51
      if(average.risk[i] >= qts[3]) {
52
        data$lvl4Groups[i] <- "High"
53
      }
54
    }
55
  }
56
  
57
  
58
  count.dups <- function(DF){
59
60
    DT <- data.table(DF)
61
    DT[,.N, by = names(DT)]
62
  }
63
64
  ####
65
  if(length(geneList) == 0) {useGenes <- topHits[1:5]}
66
  else{
67
    geneList <- gsub(" ","",geneList)
68
    useGenes <- geneList}
69
  pie.data <- data[,match(useGenes,colnames(data))]
70
  Groups <- as.character(data[,match("lvl4Groups",colnames(data))])
71
  profile <- apply(pie.data,1,function(x){
72
    return(paste0(paste0(names(x),"=",as.numeric(x),collapse =",")))
73
  })
74
  make.pie.data <- as.data.frame(cbind(Groups,profile))
75
  
76
  ## Save top 2 scenarios for KM
77
  top.scenarios <- as.data.frame(matrix(nrow = 8,ncol = 5))
78
  colnames(top.scenarios) <- useGenes
79
  start <-1
80
  
81
  # high
82
  high.pie <- try(filter(make.pie.data,Groups == "High"))
83
  profiles.high <- cbind(count.dups(high.pie)[order(-count.dups(high.pie)$N),],
84
                         paste("Profile",1:nrow(count.dups(high.pie))))
85
  colnames(profiles.high) <- c("Groups","profile","N","Profile")
86
  scenarios <- lapply(1:nrow(profiles.high),function(x){
87
    geneList <- as.numeric(gsub(".*=","",unlist(strsplit(as.character(profiles.high$profile[x]),split=","))))
88
  })
89
  profiles.high$profile <- unlist(lapply(1:nrow(profiles.high),function(x){
90
    temp <- unlist(strsplit(as.character(profiles.high$profile[x]),split = ","))
91
    prof.temp <- temp[grep("=1",temp)]
92
    prof <- toString(gsub("=1","",prof.temp))
93
  }) )
94
  scenarios <- as.data.frame(do.call(rbind, scenarios))
95
  colnames(scenarios) <- useGenes
96
  #Prob3YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (3*MD))$surv)
97
  #Prob5YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (5*MD))$surv)
98
  #profiles.high <- profiles.high #cbind(profiles.high,Prob3YSurvival,Prob5YSurvival)
99
  profiles.high$profile[which(profiles.high$profile =="")] <- "No mutants"
100
  top.scenarios[start:(start+2),] <- scenarios[1:3,]
101
  rownames(top.scenarios)[start:(start+2)] <- c("High : Profile 1","High : Profile 2","High : Profile 3")
102
  start <- start+3
103
  ## Inter high
104
  if(numGroups==4){
105
    high.inter.pie <- try(filter(make.pie.data,Groups == "High-intermediate"))
106
    profiles.inter.high <- cbind(count.dups(high.inter.pie)[order(-count.dups(high.inter.pie)$N),],
107
                                 paste("Profile",1:nrow(count.dups(high.inter.pie))))
108
    colnames(profiles.inter.high) <- c("Groups","profile","N","Profile")
109
    scenarios <- lapply(1:nrow(profiles.inter.high),function(x){
110
      geneList <- as.numeric(gsub(".*=","",unlist(strsplit(as.character(profiles.inter.high$profile[x]),split=","))))
111
    })
112
    profiles.inter.high$profile <- unlist(lapply(1:nrow(profiles.inter.high),function(x){
113
      temp <- unlist(strsplit(as.character(profiles.inter.high$profile[x]),split = ","))
114
      prof.temp <- temp[grep("=1",temp)]
115
      prof <- toString(gsub("=1","",prof.temp))
116
    }) )
117
    scenarios <- as.data.frame(do.call(rbind, scenarios))
118
    colnames(scenarios) <- useGenes
119
    #Prob3YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (3*MD))$surv)
120
    #Prob5YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (5*MD))$surv)
121
    #profiles.inter.high <- profiles.inter.high #cbind(profiles.inter.high,Prob3YSurvival,Prob5YSurvival)
122
    profiles.inter.high$profile[which(profiles.inter.high$profile =="")] <- "No mutants"
123
    top.scenarios[start:(start+2),] <- scenarios[1:3,]
124
    rownames(top.scenarios)[start:(start+2)] <- c("High-Intermediate : Profile 1","High-Intermediate : Profile 2","High-Intermediate : Profile 3")
125
    start <- start+3}
126
  
127
  ## INTERMEDIATE
128
  if(numGroups==3){
129
    inter.pie <- try(filter(make.pie.data,Groups == "Intermediate"))
130
    profiles.inter <- cbind(count.dups(inter.pie)[order(-count.dups(inter.pie)$N),],
131
                            paste("Profile",1:nrow(count.dups(inter.pie))))
132
    colnames(profiles.inter) <- c("Groups","profile","N","Profile")
133
    scenarios <- lapply(1:nrow(profiles.inter),function(x){
134
      geneList <- as.numeric(gsub(".*=","",unlist(strsplit(as.character(profiles.inter$profile[x]),split=","))))
135
    })
136
    profiles.inter$profile <- unlist(lapply(1:nrow(profiles.inter),function(x){
137
      temp <- unlist(strsplit(as.character(profiles.inter$profile[x]),split = ","))
138
      prof.temp <- temp[grep("=1",temp)]
139
      prof <- toString(gsub("=1","",prof.temp))
140
    }) )
141
    scenarios <- as.data.frame(do.call(rbind, scenarios))
142
    colnames(scenarios) <- useGenes
143
    #Prob3YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (3*MD))$surv)
144
    #Prob5YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (5*MD))$surv)
145
    #profiles.inter <- profiles.inter #cbind(profiles.inter,Prob3YSurvival,Prob5YSurvival)
146
    profiles.inter$profile[which(profiles.inter$profile =="")] <- "No mutants"
147
    top.scenarios[start:(start+2),] <- scenarios[1:3,]
148
    rownames(top.scenarios)[start:(start+2)] <- c("Intermediate : Profile 1","Intermediate : Profile 2","Intermediate : Profile 3")
149
    start <- start+3}
150
  
151
  
152
  ## LOW INTER
153
  if(numGroups==4){
154
    low.inter.pie <- try(filter(make.pie.data,Groups == "Low-intermediate"))
155
    profiles.inter.low <- cbind(count.dups(low.inter.pie)[order(-count.dups(low.inter.pie)$N),],
156
                                paste("Profile",1:nrow(count.dups(low.inter.pie))))
157
    colnames(profiles.inter.low) <- c("Groups","profile","N","Profile")
158
    scenarios <- lapply(1:nrow(profiles.inter.low),function(x){
159
      geneList <- as.numeric(gsub(".*=","",unlist(strsplit(as.character(profiles.inter.low$profile[x]),split=","))))
160
    })
161
    profiles.inter.low$profile <- unlist(lapply(1:nrow(profiles.inter.low),function(x){
162
      temp <- unlist(strsplit(as.character(profiles.inter.low$profile[x]),split = ","))
163
      prof.temp <- temp[grep("=1",temp)]
164
      prof <- toString(gsub("=1","",prof.temp))
165
    }) )
166
    scenarios <- as.data.frame(do.call(rbind, scenarios))
167
    colnames(scenarios) <- useGenes
168
    #Prob3YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (3*MD))$surv)
169
    #Prob5YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (5*MD))$surv)
170
    profiles.inter.low <- profiles.inter.low #cbind(profiles.inter.low,Prob3YSurvival,Prob5YSurvival)
171
    profiles.inter.low$profile[which(profiles.inter.low$profile =="")] <- "No mutants"
172
    top.scenarios[start:(start+2),] <- scenarios[1:3,]
173
    rownames(top.scenarios)[start:(start+2)] <- c("Low-Intermediate : Profile 1","Low-Intermediate : Profile 2","Low-Intermediate : Profile 3")
174
    start <- start+3
175
    }
176
  
177
  ## LOW
178
  low.pie <- try(filter(make.pie.data,Groups == "Low"))
179
  profiles.low <- cbind(count.dups(low.pie)[order(-count.dups(low.pie)$N),],
180
                        paste("Profile",1:nrow(count.dups(low.pie))))
181
  colnames(profiles.low) <- c("Groups","profile","N","Profile")
182
  scenarios <- lapply(1:nrow(profiles.low),function(x){
183
    geneList <- as.numeric(gsub(".*=","",unlist(strsplit(as.character(profiles.low$profile[x]),split=","))))
184
  })
185
  profiles.low$profile <- unlist(lapply(1:nrow(profiles.low),function(x){
186
    temp <- unlist(strsplit(as.character(profiles.low$profile[x]),split = ","))
187
    prof.temp <- temp[grep("=1",temp)]
188
    prof <- toString(gsub("=1","",prof.temp))
189
  }) )
190
  scenarios <- as.data.frame(do.call(rbind, scenarios))
191
  colnames(scenarios) <- useGenes
192
  #Prob3YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (3*MD))$surv)
193
  #Prob5YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (5*MD))$surv)
194
  #profiles.low <- cbind(profiles.low,Prob3YSurvival,Prob5YSurvival)
195
  profiles.low$profile[which(profiles.low$profile =="")] <- "No mutants"
196
  top.scenarios[start:(start+2),] <- scenarios[1:3,]
197
  rownames(top.scenarios)[start:(start+2)] <- c("Low : Profile 1","Low : Profile 2","Low : Profile 3")
198
  start <- start+3
199
  
200
201
  
202
  if(numGroups == 2){
203
    pie.chart <- plot_ly() %>%
204
      add_pie(data = profiles.low, labels = ~profile, values = ~N,
205
              name = "Low Group", domain = list(x = c(0, 0.4), y = c(0.3, 0.8)),
206
              hoverinfo="hovertext",
207
              hovertext = ~paste('',Profile
208
                                 #'</br> 3 year survival',round(Prob3YSurvival,digits = 2),
209
                                 #'</br> 5 year survival',round(Prob5YSurvival,digits = 2)
210
                                 ),
211
              type="pie") %>%
212
      add_pie(data = profiles.high, labels = ~profile, values = ~N,
213
              name = "High Group", domain = list(x = c(0.45, 0.95), y = c(0.3, 0.8)),hoverinfo="hovertext",
214
              hovertext = ~paste('',Profile
215
                                 #'</br> 3 year survival',round(Prob3YSurvival,digits = 2),
216
                                 #'</br> 5 year survival',round(Prob5YSurvival,digits = 2)
217
                                 ),
218
              type="pie") %>%
219
      layout(title = "Genetic Profile Distribution", showlegend = F,
220
             xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
221
             yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
222
  }
223
  
224
  if(numGroups == 3){
225
    pie.chart <- plot_ly() %>%
226
      add_pie(data = profiles.low, labels = ~profile, values = ~N,
227
              name = "Low Group", domain = list(x = c(0, 0.4), y = c(0.5, 1)),hoverinfo="hovertext",
228
              hovertext = ~paste('',Profile
229
                                 #'</br> 3 year survival',round(Prob3YSurvival,digits = 2),
230
                                 #'</br> 5 year survival',round(Prob5YSurvival,digits = 2)
231
                                 ),
232
              type="pie") %>%
233
      add_pie(data = profiles.inter, labels = ~profile, values = ~N,
234
              name = "Intermediate Group", domain = list(x = c(0.45, 0.95), y = c(0.5, 1)),hoverinfo="hovertext",
235
              hovertext = ~paste('',Profile
236
                                 #'</br> 3 year survival',round(Prob3YSurvival,digits = 2),
237
                                 #'</br> 5 year survival',round(Prob5YSurvival,digits = 2)
238
                                 ),
239
              type="pie") %>%
240
      add_pie(data = profiles.high, labels = ~profile, values = ~N,
241
              name = "High Group", domain = list(x = c(0.2, 0.6), y = c(0, 0.5)),hoverinfo="hovertext",
242
              hovertext = ~paste('',Profile
243
                                 #'</br> 3 year survival',round(Prob3YSurvival,digits = 2),
244
                                 #'</br> 5 year survival',round(Prob5YSurvival,digits = 2)
245
                                 ),
246
              type="pie") %>%
247
      layout(title = "Genetic Profile Distribution", showlegend = F,
248
             xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
249
             yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
250
  }
251
  
252
  if(numGroups == 4){
253
    pie.chart <- plot_ly() %>%
254
      add_pie(data = profiles.low, labels = ~profile, values = ~N,
255
              name = "Lo", domain = list(x = c(0, 0.22), y = c(0, 0.9)),hoverinfo="hovertext",
256
              hovertext = ~paste('',Profile
257
                                 #'</br> Mutant genes',profile
258
                                 #'</br> 3 year survival',round(Prob3YSurvival,digits = 2),
259
                                 #'</br> 5 year survival',round(Prob5YSurvival,digits = 2)
260
                                 ),
261
              type="pie") %>%
262
      add_pie(data = profiles.inter.low, labels = ~profile, values = ~N,
263
              name = "ILo", domain = list(x = c(0.26, 0.48), y = c(0, 0.9)),hoverinfo="hovertext",
264
              hovertext = ~paste('',Profile
265
                                 #'</br> Mutant genes',profile
266
                                 #'</br> 3 year survival',round(Prob3YSurvival,digits = 2),
267
                                 #'</br> 5 year survival',round(Prob5YSurvival,digits = 2)
268
                                 ),
269
              type="pie") %>%
270
      add_pie(data = profiles.inter.high, labels = ~profile, values = ~N,
271
              name = "IHi", domain = list(x = c(0.52, 0.74), y = c(0, 0.9)),hoverinfo="hovertext",
272
              hovertext = ~paste('',Profile
273
                                 #'</br> Mutant genes',profile
274
                                 #'</br> 3 year survival',round(Prob3YSurvival,digits = 2),
275
                                 #'</br> 5 year survival',round(Prob5YSurvival,digits = 2)
276
                                 ),
277
              type="pie") %>%
278
      add_pie(data = profiles.high, labels = ~profile, values = ~N,
279
              name = "Hi", domain = list(x = c(0.78, 1), y = c(0, 0.9)),hoverinfo="hovertext",
280
              hovertext = ~paste('',Profile
281
                                 #'</br> Mutant genes',profile
282
                                 #'</br> 3 year survival',round(Prob3YSurvival,digits = 2),
283
                                 #'</br> 5 year survival',round(Prob5YSurvival,digits = 2)
284
                                 ),
285
              type="pie") %>%
286
      add_annotations(x= 0.05, y= 1, xref = "paper", yref = "paper", text = "<b>Low risk</b>", showarrow = F) %>%
287
      add_annotations(x= 0.3, y= 1, xref = "paper", yref = "paper", text = "<b>Intermediate low</b>", showarrow = F) %>%
288
      add_annotations(x= 0.61, y= 1, xref = "paper", yref = "paper", text = "<b>Intermediate high</b>", showarrow = F) %>%
289
      add_annotations(x= 0.9, y= 1, xref = "paper", yref = "paper", text = "<b>High risk</b>", showarrow = F) %>%
290
      layout(title = "Genetic Profile Distribution", showlegend = F,
291
             xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
292
             yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
293
  }
294
  
295
  return(list("PieChart"=pie.chart,"GenesUsed"=paste0(useGenes,collapse = ",")))
296
}
297
298
# RiskGroupsResults <- MakeKM(data,average.risk,topHits,LT,numGroups,cuts,geneList = NULL)
299
# 
300
# # Make2KM(data = data.gen,average.risk = averaged.risk.gen.boot.bag,
301
# #         file = "GenOnly_Bagging",type.data = "GenOnly")
302
# save(RiskGroupsResults,file="RiskGroupsResults.Rdata")