|
a |
|
b/Scripts/GetResultsVarSelect.R |
|
|
1 |
########################################## |
|
|
2 |
###### AUTOMATIZE READING RESULTS ######## |
|
|
3 |
########################################## |
|
|
4 |
|
|
|
5 |
## INPUT : |
|
|
6 |
# 1) studyType : name of the IMPACT directory where the results/data are saved |
|
|
7 |
# 2) method : Method implemented to be analysed (LASSO,RF,GBM,CF) |
|
|
8 |
# 3) LT : is the data left truncated : TRUE v FALSE |
|
|
9 |
|
|
|
10 |
## OUTPUT : |
|
|
11 |
# For each of the methods to be analyzed will give |
|
|
12 |
# 1) CI distribution bar plot |
|
|
13 |
# 2) An influence plot of top hit genes |
|
|
14 |
# 3) Kaplan meier plot based on 4 risk groups |
|
|
15 |
# 4) Proportion of mutated genes in each of the 4 risk groups, per top hit gene |
|
|
16 |
|
|
|
17 |
getResults <- function(studyType,method,geneList){ |
|
|
18 |
|
|
|
19 |
|
|
|
20 |
load("./Study/Lung/results/Lung_LASSO.Rdata") |
|
|
21 |
data <- read.csv("./Study/Lung/data/LungReadyStudy.csv",header = TRUE, row.names = 1) |
|
|
22 |
|
|
|
23 |
## determine if left truncated |
|
|
24 |
LT = T |
|
|
25 |
MD = 12 |
|
|
26 |
time.type = "Months" |
|
|
27 |
|
|
|
28 |
### LASSO ANALYSIS ### |
|
|
29 |
|
|
|
30 |
#try(setwd("./results/")) |
|
|
31 |
|
|
|
32 |
if("LASSO" %in% method) { |
|
|
33 |
|
|
|
34 |
Variables <- colnames(data) |
|
|
35 |
allCoefs <- as.data.frame(matrix(nrow=length(LASSO),ncol=length(Variables))) |
|
|
36 |
colnames(allCoefs) <- Variables |
|
|
37 |
|
|
|
38 |
for(x in 1:length(LASSO)){ |
|
|
39 |
coefsValues <- LASSO[[x]]$fit[,1] |
|
|
40 |
allCoefs[x,match(names(coefsValues),colnames(allCoefs))] <- as.numeric(coefsValues) |
|
|
41 |
} |
|
|
42 |
allCoefs[is.na(allCoefs)] <- 0 |
|
|
43 |
|
|
|
44 |
meanCoefs <- apply(allCoefs,2,function(x){mean(x,na.rm = TRUE)}) |
|
|
45 |
selectFreq <- apply(allCoefs,2,function(x){ |
|
|
46 |
length(which(x!=0))/length(x) |
|
|
47 |
}) |
|
|
48 |
|
|
|
49 |
## get mu freq |
|
|
50 |
data.temp <- data |
|
|
51 |
MutationFrequency <- apply(data.temp,2,function(x){ |
|
|
52 |
sum(x)/length(x) |
|
|
53 |
}) |
|
|
54 |
|
|
|
55 |
resultsAll <- as.data.frame(cbind(meanCoefs,selectFreq,MutationFrequency)) |
|
|
56 |
colnames(resultsAll) <- c("MeanCoefficient","SelectionFrequency","MutationFrequency") |
|
|
57 |
rownames(resultsAll) <- names(meanCoefs) |
|
|
58 |
resultsAll <- resultsAll[complete.cases(resultsAll),] |
|
|
59 |
resultsAll$GeneName <- rownames(resultsAll) |
|
|
60 |
resultsAll$MutationFrequency2 <- cut(resultsAll$MutationFrequency, c(0,0.10,0.20,0.40)) |
|
|
61 |
|
|
|
62 |
if(length(geneList)!=0){ |
|
|
63 |
m <- resultsAll[match(geneList,rownames(resultsAll)), ] |
|
|
64 |
|
|
|
65 |
a <- list( |
|
|
66 |
x = m$MeanCoefficient, |
|
|
67 |
y = m$SelectionFrequency, |
|
|
68 |
text = rownames(m), |
|
|
69 |
xref = "x", |
|
|
70 |
yref = "y", |
|
|
71 |
showarrow = TRUE, |
|
|
72 |
arrowhead = 7, |
|
|
73 |
ax = 20, |
|
|
74 |
ay = -40 |
|
|
75 |
) |
|
|
76 |
|
|
|
77 |
selectInflPlot <- plot_ly(data = resultsAll, x = ~MeanCoefficient, y = ~SelectionFrequency, |
|
|
78 |
text = ~paste('Gene :',GeneName, |
|
|
79 |
'</br> Hazard Ratio :',round(exp(MeanCoefficient),digits=2)), |
|
|
80 |
mode = "markers",size = ~MutationFrequency,color = ~MutationFrequency) %>% |
|
|
81 |
layout(title ="Volcano Plot",annotations = a) |
|
|
82 |
} |
|
|
83 |
|
|
|
84 |
else{ |
|
|
85 |
selectInflPlot <- plot_ly(data = resultsAll, x = ~MeanCoefficient, y = ~SelectionFrequency, |
|
|
86 |
text = ~paste('Gene :',GeneName, |
|
|
87 |
'</br> Hazard Ratio :',round(exp(MeanCoefficient),digits=2)), |
|
|
88 |
mode = "markers",size = ~MutationFrequency,color = ~MutationFrequency) %>% |
|
|
89 |
layout(title ="Volcano Plot") |
|
|
90 |
} |
|
|
91 |
|
|
|
92 |
} |
|
|
93 |
|
|
|
94 |
# return(list("ciSummary" = CI.BP,"inflPlot" = influencePlot,"topHits" = topHits,"average.risk"=average.risk,"data.out"= data, |
|
|
95 |
# "selectInflPlot" = selectInflPlot,"MethodUsed" = method,"RiskRefit"=refit.risk,"ClinRefitTable"=ClinRefitTable, |
|
|
96 |
# "RiskHistogram"=RiskHistogram,"LassoFits"=allCoefs,"ClinRefit"=refit.risk.clin,"time.type"=time.type,"MD"=MD, |
|
|
97 |
# "RiskScoreSummary"=as.data.frame(t(summary.RiskScore)))) |
|
|
98 |
return(list("selectInflPlot" = selectInflPlot)) |
|
|
99 |
} |
|
|
100 |
#test <- getResults(studyType="Lung",method="LASSO",CNV=FALSE,OnlyCNV= FALSE) |
|
|
101 |
|
|
|
102 |
|
|
|
103 |
# FirstRun <- getResults(studyType="Lung",method="LASSO",CNV=FALSE,OnlyCNV= FALSE,geneList = NULL) |
|
|
104 |
# save(FirstRun,file="FirstRun.Rdata") |