|
a |
|
b/Scripts/MakeKM.R |
|
|
1 |
MakeKM <- function(data,average.risk,topHits,LT,numGroups,cuts,geneList){ |
|
|
2 |
|
|
|
3 |
data$lvl4Groups <- rep(NA,nrow(data)) |
|
|
4 |
qts <- as.numeric(quantile(average.risk,cuts)) |
|
|
5 |
|
|
|
6 |
if(numGroups ==2){ |
|
|
7 |
for(i in 1:nrow(data)){ |
|
|
8 |
|
|
|
9 |
# for 2 level groups |
|
|
10 |
if(average.risk[i] < qts) { |
|
|
11 |
data$lvl4Groups[i] <- "Low" |
|
|
12 |
} |
|
|
13 |
|
|
|
14 |
if(average.risk[i] >= qts) { |
|
|
15 |
data$lvl4Groups[i] <- "High" |
|
|
16 |
} |
|
|
17 |
} |
|
|
18 |
} |
|
|
19 |
|
|
|
20 |
if(numGroups == 3){ |
|
|
21 |
|
|
|
22 |
for(i in 1:nrow(data)){ |
|
|
23 |
|
|
|
24 |
# for 3 level groups |
|
|
25 |
if(average.risk[i] < qts[1]) { |
|
|
26 |
data$lvl4Groups[i] <- "Low" |
|
|
27 |
} |
|
|
28 |
if(average.risk[i] >= qts[1] && average.risk[i] < qts[2]){ |
|
|
29 |
data$lvl4Groups[i] <- "Intermediate" |
|
|
30 |
} |
|
|
31 |
if(average.risk[i] >= qts[2]) { |
|
|
32 |
data$lvl4Groups[i] <- "High" |
|
|
33 |
} |
|
|
34 |
} |
|
|
35 |
} |
|
|
36 |
|
|
|
37 |
if(numGroups == 4){ |
|
|
38 |
#qts <- quantile(average.risk,probs = c(0.25,0.75,0.9)) |
|
|
39 |
for(i in 1:nrow(data)){ |
|
|
40 |
|
|
|
41 |
# for 4 level groups |
|
|
42 |
if(average.risk[i] < qts[1]) { |
|
|
43 |
data$lvl4Groups[i] <- "Low" |
|
|
44 |
} |
|
|
45 |
if(average.risk[i] >= qts[1] && average.risk[i] < qts[2]){ |
|
|
46 |
data$lvl4Groups[i] <- "Low-intermediate" |
|
|
47 |
} |
|
|
48 |
if(average.risk[i] >= qts[2] && average.risk[i] < qts[3]){ |
|
|
49 |
data$lvl4Groups[i] <- "High-intermediate" |
|
|
50 |
} |
|
|
51 |
if(average.risk[i] >= qts[3]) { |
|
|
52 |
data$lvl4Groups[i] <- "High" |
|
|
53 |
} |
|
|
54 |
} |
|
|
55 |
} |
|
|
56 |
|
|
|
57 |
|
|
|
58 |
count.dups <- function(DF){ |
|
|
59 |
|
|
|
60 |
DT <- data.table(DF) |
|
|
61 |
DT[,.N, by = names(DT)] |
|
|
62 |
} |
|
|
63 |
|
|
|
64 |
#### |
|
|
65 |
if(length(geneList) == 0) {useGenes <- topHits[1:5]} |
|
|
66 |
else{ |
|
|
67 |
geneList <- gsub(" ","",geneList) |
|
|
68 |
useGenes <- geneList} |
|
|
69 |
pie.data <- data[,match(useGenes,colnames(data))] |
|
|
70 |
Groups <- as.character(data[,match("lvl4Groups",colnames(data))]) |
|
|
71 |
profile <- apply(pie.data,1,function(x){ |
|
|
72 |
return(paste0(paste0(names(x),"=",as.numeric(x),collapse =","))) |
|
|
73 |
}) |
|
|
74 |
make.pie.data <- as.data.frame(cbind(Groups,profile)) |
|
|
75 |
|
|
|
76 |
## Save top 2 scenarios for KM |
|
|
77 |
top.scenarios <- as.data.frame(matrix(nrow = 8,ncol = 5)) |
|
|
78 |
colnames(top.scenarios) <- useGenes |
|
|
79 |
start <-1 |
|
|
80 |
|
|
|
81 |
# high |
|
|
82 |
high.pie <- try(filter(make.pie.data,Groups == "High")) |
|
|
83 |
profiles.high <- cbind(count.dups(high.pie)[order(-count.dups(high.pie)$N),], |
|
|
84 |
paste("Profile",1:nrow(count.dups(high.pie)))) |
|
|
85 |
colnames(profiles.high) <- c("Groups","profile","N","Profile") |
|
|
86 |
scenarios <- lapply(1:nrow(profiles.high),function(x){ |
|
|
87 |
geneList <- as.numeric(gsub(".*=","",unlist(strsplit(as.character(profiles.high$profile[x]),split=",")))) |
|
|
88 |
}) |
|
|
89 |
profiles.high$profile <- unlist(lapply(1:nrow(profiles.high),function(x){ |
|
|
90 |
temp <- unlist(strsplit(as.character(profiles.high$profile[x]),split = ",")) |
|
|
91 |
prof.temp <- temp[grep("=1",temp)] |
|
|
92 |
prof <- toString(gsub("=1","",prof.temp)) |
|
|
93 |
}) ) |
|
|
94 |
scenarios <- as.data.frame(do.call(rbind, scenarios)) |
|
|
95 |
colnames(scenarios) <- useGenes |
|
|
96 |
#Prob3YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (3*MD))$surv) |
|
|
97 |
#Prob5YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (5*MD))$surv) |
|
|
98 |
#profiles.high <- profiles.high #cbind(profiles.high,Prob3YSurvival,Prob5YSurvival) |
|
|
99 |
profiles.high$profile[which(profiles.high$profile =="")] <- "No mutants" |
|
|
100 |
top.scenarios[start:(start+2),] <- scenarios[1:3,] |
|
|
101 |
rownames(top.scenarios)[start:(start+2)] <- c("High : Profile 1","High : Profile 2","High : Profile 3") |
|
|
102 |
start <- start+3 |
|
|
103 |
## Inter high |
|
|
104 |
if(numGroups==4){ |
|
|
105 |
high.inter.pie <- try(filter(make.pie.data,Groups == "High-intermediate")) |
|
|
106 |
profiles.inter.high <- cbind(count.dups(high.inter.pie)[order(-count.dups(high.inter.pie)$N),], |
|
|
107 |
paste("Profile",1:nrow(count.dups(high.inter.pie)))) |
|
|
108 |
colnames(profiles.inter.high) <- c("Groups","profile","N","Profile") |
|
|
109 |
scenarios <- lapply(1:nrow(profiles.inter.high),function(x){ |
|
|
110 |
geneList <- as.numeric(gsub(".*=","",unlist(strsplit(as.character(profiles.inter.high$profile[x]),split=",")))) |
|
|
111 |
}) |
|
|
112 |
profiles.inter.high$profile <- unlist(lapply(1:nrow(profiles.inter.high),function(x){ |
|
|
113 |
temp <- unlist(strsplit(as.character(profiles.inter.high$profile[x]),split = ",")) |
|
|
114 |
prof.temp <- temp[grep("=1",temp)] |
|
|
115 |
prof <- toString(gsub("=1","",prof.temp)) |
|
|
116 |
}) ) |
|
|
117 |
scenarios <- as.data.frame(do.call(rbind, scenarios)) |
|
|
118 |
colnames(scenarios) <- useGenes |
|
|
119 |
#Prob3YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (3*MD))$surv) |
|
|
120 |
#Prob5YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (5*MD))$surv) |
|
|
121 |
#profiles.inter.high <- profiles.inter.high #cbind(profiles.inter.high,Prob3YSurvival,Prob5YSurvival) |
|
|
122 |
profiles.inter.high$profile[which(profiles.inter.high$profile =="")] <- "No mutants" |
|
|
123 |
top.scenarios[start:(start+2),] <- scenarios[1:3,] |
|
|
124 |
rownames(top.scenarios)[start:(start+2)] <- c("High-Intermediate : Profile 1","High-Intermediate : Profile 2","High-Intermediate : Profile 3") |
|
|
125 |
start <- start+3} |
|
|
126 |
|
|
|
127 |
## INTERMEDIATE |
|
|
128 |
if(numGroups==3){ |
|
|
129 |
inter.pie <- try(filter(make.pie.data,Groups == "Intermediate")) |
|
|
130 |
profiles.inter <- cbind(count.dups(inter.pie)[order(-count.dups(inter.pie)$N),], |
|
|
131 |
paste("Profile",1:nrow(count.dups(inter.pie)))) |
|
|
132 |
colnames(profiles.inter) <- c("Groups","profile","N","Profile") |
|
|
133 |
scenarios <- lapply(1:nrow(profiles.inter),function(x){ |
|
|
134 |
geneList <- as.numeric(gsub(".*=","",unlist(strsplit(as.character(profiles.inter$profile[x]),split=",")))) |
|
|
135 |
}) |
|
|
136 |
profiles.inter$profile <- unlist(lapply(1:nrow(profiles.inter),function(x){ |
|
|
137 |
temp <- unlist(strsplit(as.character(profiles.inter$profile[x]),split = ",")) |
|
|
138 |
prof.temp <- temp[grep("=1",temp)] |
|
|
139 |
prof <- toString(gsub("=1","",prof.temp)) |
|
|
140 |
}) ) |
|
|
141 |
scenarios <- as.data.frame(do.call(rbind, scenarios)) |
|
|
142 |
colnames(scenarios) <- useGenes |
|
|
143 |
#Prob3YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (3*MD))$surv) |
|
|
144 |
#Prob5YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (5*MD))$surv) |
|
|
145 |
#profiles.inter <- profiles.inter #cbind(profiles.inter,Prob3YSurvival,Prob5YSurvival) |
|
|
146 |
profiles.inter$profile[which(profiles.inter$profile =="")] <- "No mutants" |
|
|
147 |
top.scenarios[start:(start+2),] <- scenarios[1:3,] |
|
|
148 |
rownames(top.scenarios)[start:(start+2)] <- c("Intermediate : Profile 1","Intermediate : Profile 2","Intermediate : Profile 3") |
|
|
149 |
start <- start+3} |
|
|
150 |
|
|
|
151 |
|
|
|
152 |
## LOW INTER |
|
|
153 |
if(numGroups==4){ |
|
|
154 |
low.inter.pie <- try(filter(make.pie.data,Groups == "Low-intermediate")) |
|
|
155 |
profiles.inter.low <- cbind(count.dups(low.inter.pie)[order(-count.dups(low.inter.pie)$N),], |
|
|
156 |
paste("Profile",1:nrow(count.dups(low.inter.pie)))) |
|
|
157 |
colnames(profiles.inter.low) <- c("Groups","profile","N","Profile") |
|
|
158 |
scenarios <- lapply(1:nrow(profiles.inter.low),function(x){ |
|
|
159 |
geneList <- as.numeric(gsub(".*=","",unlist(strsplit(as.character(profiles.inter.low$profile[x]),split=",")))) |
|
|
160 |
}) |
|
|
161 |
profiles.inter.low$profile <- unlist(lapply(1:nrow(profiles.inter.low),function(x){ |
|
|
162 |
temp <- unlist(strsplit(as.character(profiles.inter.low$profile[x]),split = ",")) |
|
|
163 |
prof.temp <- temp[grep("=1",temp)] |
|
|
164 |
prof <- toString(gsub("=1","",prof.temp)) |
|
|
165 |
}) ) |
|
|
166 |
scenarios <- as.data.frame(do.call(rbind, scenarios)) |
|
|
167 |
colnames(scenarios) <- useGenes |
|
|
168 |
#Prob3YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (3*MD))$surv) |
|
|
169 |
#Prob5YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (5*MD))$surv) |
|
|
170 |
profiles.inter.low <- profiles.inter.low #cbind(profiles.inter.low,Prob3YSurvival,Prob5YSurvival) |
|
|
171 |
profiles.inter.low$profile[which(profiles.inter.low$profile =="")] <- "No mutants" |
|
|
172 |
top.scenarios[start:(start+2),] <- scenarios[1:3,] |
|
|
173 |
rownames(top.scenarios)[start:(start+2)] <- c("Low-Intermediate : Profile 1","Low-Intermediate : Profile 2","Low-Intermediate : Profile 3") |
|
|
174 |
start <- start+3 |
|
|
175 |
} |
|
|
176 |
|
|
|
177 |
## LOW |
|
|
178 |
low.pie <- try(filter(make.pie.data,Groups == "Low")) |
|
|
179 |
profiles.low <- cbind(count.dups(low.pie)[order(-count.dups(low.pie)$N),], |
|
|
180 |
paste("Profile",1:nrow(count.dups(low.pie)))) |
|
|
181 |
colnames(profiles.low) <- c("Groups","profile","N","Profile") |
|
|
182 |
scenarios <- lapply(1:nrow(profiles.low),function(x){ |
|
|
183 |
geneList <- as.numeric(gsub(".*=","",unlist(strsplit(as.character(profiles.low$profile[x]),split=",")))) |
|
|
184 |
}) |
|
|
185 |
profiles.low$profile <- unlist(lapply(1:nrow(profiles.low),function(x){ |
|
|
186 |
temp <- unlist(strsplit(as.character(profiles.low$profile[x]),split = ",")) |
|
|
187 |
prof.temp <- temp[grep("=1",temp)] |
|
|
188 |
prof <- toString(gsub("=1","",prof.temp)) |
|
|
189 |
}) ) |
|
|
190 |
scenarios <- as.data.frame(do.call(rbind, scenarios)) |
|
|
191 |
colnames(scenarios) <- useGenes |
|
|
192 |
#Prob3YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (3*MD))$surv) |
|
|
193 |
#Prob5YSurvival <- as.numeric(summary(survfit(fit.topHits, newdata = scenarios, se.fit = F, conf.int = F), times = (5*MD))$surv) |
|
|
194 |
#profiles.low <- cbind(profiles.low,Prob3YSurvival,Prob5YSurvival) |
|
|
195 |
profiles.low$profile[which(profiles.low$profile =="")] <- "No mutants" |
|
|
196 |
top.scenarios[start:(start+2),] <- scenarios[1:3,] |
|
|
197 |
rownames(top.scenarios)[start:(start+2)] <- c("Low : Profile 1","Low : Profile 2","Low : Profile 3") |
|
|
198 |
start <- start+3 |
|
|
199 |
|
|
|
200 |
|
|
|
201 |
|
|
|
202 |
if(numGroups == 2){ |
|
|
203 |
pie.chart <- plot_ly() %>% |
|
|
204 |
add_pie(data = profiles.low, labels = ~profile, values = ~N, |
|
|
205 |
name = "Low Group", domain = list(x = c(0, 0.4), y = c(0.3, 0.8)), |
|
|
206 |
hoverinfo="hovertext", |
|
|
207 |
hovertext = ~paste('',Profile |
|
|
208 |
#'</br> 3 year survival',round(Prob3YSurvival,digits = 2), |
|
|
209 |
#'</br> 5 year survival',round(Prob5YSurvival,digits = 2) |
|
|
210 |
), |
|
|
211 |
type="pie") %>% |
|
|
212 |
add_pie(data = profiles.high, labels = ~profile, values = ~N, |
|
|
213 |
name = "High Group", domain = list(x = c(0.45, 0.95), y = c(0.3, 0.8)),hoverinfo="hovertext", |
|
|
214 |
hovertext = ~paste('',Profile |
|
|
215 |
#'</br> 3 year survival',round(Prob3YSurvival,digits = 2), |
|
|
216 |
#'</br> 5 year survival',round(Prob5YSurvival,digits = 2) |
|
|
217 |
), |
|
|
218 |
type="pie") %>% |
|
|
219 |
layout(title = "Genetic Profile Distribution", showlegend = F, |
|
|
220 |
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE), |
|
|
221 |
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE)) |
|
|
222 |
} |
|
|
223 |
|
|
|
224 |
if(numGroups == 3){ |
|
|
225 |
pie.chart <- plot_ly() %>% |
|
|
226 |
add_pie(data = profiles.low, labels = ~profile, values = ~N, |
|
|
227 |
name = "Low Group", domain = list(x = c(0, 0.4), y = c(0.5, 1)),hoverinfo="hovertext", |
|
|
228 |
hovertext = ~paste('',Profile |
|
|
229 |
#'</br> 3 year survival',round(Prob3YSurvival,digits = 2), |
|
|
230 |
#'</br> 5 year survival',round(Prob5YSurvival,digits = 2) |
|
|
231 |
), |
|
|
232 |
type="pie") %>% |
|
|
233 |
add_pie(data = profiles.inter, labels = ~profile, values = ~N, |
|
|
234 |
name = "Intermediate Group", domain = list(x = c(0.45, 0.95), y = c(0.5, 1)),hoverinfo="hovertext", |
|
|
235 |
hovertext = ~paste('',Profile |
|
|
236 |
#'</br> 3 year survival',round(Prob3YSurvival,digits = 2), |
|
|
237 |
#'</br> 5 year survival',round(Prob5YSurvival,digits = 2) |
|
|
238 |
), |
|
|
239 |
type="pie") %>% |
|
|
240 |
add_pie(data = profiles.high, labels = ~profile, values = ~N, |
|
|
241 |
name = "High Group", domain = list(x = c(0.2, 0.6), y = c(0, 0.5)),hoverinfo="hovertext", |
|
|
242 |
hovertext = ~paste('',Profile |
|
|
243 |
#'</br> 3 year survival',round(Prob3YSurvival,digits = 2), |
|
|
244 |
#'</br> 5 year survival',round(Prob5YSurvival,digits = 2) |
|
|
245 |
), |
|
|
246 |
type="pie") %>% |
|
|
247 |
layout(title = "Genetic Profile Distribution", showlegend = F, |
|
|
248 |
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE), |
|
|
249 |
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE)) |
|
|
250 |
} |
|
|
251 |
|
|
|
252 |
if(numGroups == 4){ |
|
|
253 |
pie.chart <- plot_ly() %>% |
|
|
254 |
add_pie(data = profiles.low, labels = ~profile, values = ~N, |
|
|
255 |
name = "Lo", domain = list(x = c(0, 0.22), y = c(0, 0.9)),hoverinfo="hovertext", |
|
|
256 |
hovertext = ~paste('',Profile |
|
|
257 |
#'</br> Mutant genes',profile |
|
|
258 |
#'</br> 3 year survival',round(Prob3YSurvival,digits = 2), |
|
|
259 |
#'</br> 5 year survival',round(Prob5YSurvival,digits = 2) |
|
|
260 |
), |
|
|
261 |
type="pie") %>% |
|
|
262 |
add_pie(data = profiles.inter.low, labels = ~profile, values = ~N, |
|
|
263 |
name = "ILo", domain = list(x = c(0.26, 0.48), y = c(0, 0.9)),hoverinfo="hovertext", |
|
|
264 |
hovertext = ~paste('',Profile |
|
|
265 |
#'</br> Mutant genes',profile |
|
|
266 |
#'</br> 3 year survival',round(Prob3YSurvival,digits = 2), |
|
|
267 |
#'</br> 5 year survival',round(Prob5YSurvival,digits = 2) |
|
|
268 |
), |
|
|
269 |
type="pie") %>% |
|
|
270 |
add_pie(data = profiles.inter.high, labels = ~profile, values = ~N, |
|
|
271 |
name = "IHi", domain = list(x = c(0.52, 0.74), y = c(0, 0.9)),hoverinfo="hovertext", |
|
|
272 |
hovertext = ~paste('',Profile |
|
|
273 |
#'</br> Mutant genes',profile |
|
|
274 |
#'</br> 3 year survival',round(Prob3YSurvival,digits = 2), |
|
|
275 |
#'</br> 5 year survival',round(Prob5YSurvival,digits = 2) |
|
|
276 |
), |
|
|
277 |
type="pie") %>% |
|
|
278 |
add_pie(data = profiles.high, labels = ~profile, values = ~N, |
|
|
279 |
name = "Hi", domain = list(x = c(0.78, 1), y = c(0, 0.9)),hoverinfo="hovertext", |
|
|
280 |
hovertext = ~paste('',Profile |
|
|
281 |
#'</br> Mutant genes',profile |
|
|
282 |
#'</br> 3 year survival',round(Prob3YSurvival,digits = 2), |
|
|
283 |
#'</br> 5 year survival',round(Prob5YSurvival,digits = 2) |
|
|
284 |
), |
|
|
285 |
type="pie") %>% |
|
|
286 |
add_annotations(x= 0.05, y= 1, xref = "paper", yref = "paper", text = "<b>Low risk</b>", showarrow = F) %>% |
|
|
287 |
add_annotations(x= 0.3, y= 1, xref = "paper", yref = "paper", text = "<b>Intermediate low</b>", showarrow = F) %>% |
|
|
288 |
add_annotations(x= 0.61, y= 1, xref = "paper", yref = "paper", text = "<b>Intermediate high</b>", showarrow = F) %>% |
|
|
289 |
add_annotations(x= 0.9, y= 1, xref = "paper", yref = "paper", text = "<b>High risk</b>", showarrow = F) %>% |
|
|
290 |
layout(title = "Genetic Profile Distribution", showlegend = F, |
|
|
291 |
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE), |
|
|
292 |
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE)) |
|
|
293 |
} |
|
|
294 |
|
|
|
295 |
return(list("PieChart"=pie.chart,"GenesUsed"=paste0(useGenes,collapse = ","))) |
|
|
296 |
} |
|
|
297 |
|
|
|
298 |
# RiskGroupsResults <- MakeKM(data,average.risk,topHits,LT,numGroups,cuts,geneList = NULL) |
|
|
299 |
# |
|
|
300 |
# # Make2KM(data = data.gen,average.risk = averaged.risk.gen.boot.bag, |
|
|
301 |
# # file = "GenOnly_Bagging",type.data = "GenOnly") |
|
|
302 |
# save(RiskGroupsResults,file="RiskGroupsResults.Rdata") |