[a4ee51]: / R / predictLearner.R

Download this file

155 lines (125 with data), 6.8 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#' Make predictions using a trained 'IntegratedLearner' model
#'
#'@description This function makes predictions using a trained 'IntegratedLearner' model for new samples for which predictions are to be made
#'
#' @param fit fitted "IntegratedLearner" object
#' @param feature_table_valid Feature table from validation set. Must have the exact same structure as feature_table.
#' @param sample_metadata_valid OPTIONAL (can provide feature_table_valid and not this): Sample-specific metadata table from independent validation set. If provided, it must have the exact same structure as sample_metadata.
#' @param feature_metadata Matrix containing feature names and their corresponding layers. Must be same as that provided in IntegratedLearner object.
#'
#' @return Predicted values
#' @export
predict.learner <- function(fit,
feature_table_valid = NULL, # Feature table from validation set. Must have the exact same structure as feature_table. If missing, uses feature_table for feature_table_valid.
sample_metadata_valid = NULL, # Optional: Sample-specific metadata table from independent validation set. Must have the exact same structure as sample_metadata.
feature_metadata=NULL){
if(all(fit$feature.names==rownames(feature_metadata))==FALSE){
stop("Both training feature_table and feature_metadata should have the same rownames.")
}
if(is.null(feature_table_valid)){
stop("Feature table for validation set cannot be empty")
}
# if(is.null(sample_metadata_valid)){
# stop("Sample metadata for validation set cannot be empty")
# }
if (!is.null(feature_table_valid)){
if(all(fit$feature.names==rownames(feature_table_valid))==FALSE)
stop("Both feature_table and feature_table_valid should have the same rownames.")
}
if (!is.null(sample_metadata_valid)){
if(all(colnames(feature_table_valid)==rownames(sample_metadata_valid))==FALSE)
stop("Row names of sample_metadata_valid must match the column names of feature_table_valid")
}
if (!'featureID' %in% colnames(feature_metadata)){
stop("feature_metadata must have a column named 'featureID' describing per-feature unique identifiers.")
}
if (!'featureType' %in% colnames(feature_metadata)){
stop("feature_metadata must have a column named 'featureType' describing the corresponding source layers.")
}
if (!is.null(sample_metadata_valid)){
if (!'subjectID' %in% colnames(sample_metadata_valid)){
stop("sample_metadata_valid must have a column named 'subjectID' describing per-subject unique identifiers.")
}
if (!'Y' %in% colnames(sample_metadata_valid)){
stop("sample_metadata_valid must have a column named 'Y' describing the outcome of interest.")
}
}
#############################################################################################
# Extract validation Y right away (will not be used anywhere during the validation process) #
#############################################################################################
if (!is.null(sample_metadata_valid)){validY<-sample_metadata_valid['Y']}
#####################################################################
# Stacked generalization input data preparation for validation data #
#####################################################################
feature_metadata$featureType<-as.factor(feature_metadata$featureType)
name_layers<-with(droplevels(feature_metadata), list(levels = levels(featureType)),
nlevels = nlevels(featureType))$levels
X_test_layers <- vector("list", length(name_layers))
names(X_test_layers) <- name_layers
layer_wise_prediction_valid<-vector("list", length(name_layers))
names(layer_wise_prediction_valid)<-name_layers
for(i in seq_along(name_layers)){
############################################################
# Prepare single-omic validation data and save predictions #
############################################################
include_list<-feature_metadata %>% filter(featureType == name_layers[i])
t_dat_slice_valid<-feature_table_valid[rownames(feature_table_valid) %in% include_list$featureID, ]
dat_slice_valid<-as.data.frame(t(t_dat_slice_valid))
X_test_layers[[i]] <- dat_slice_valid
layer_wise_prediction_valid[[i]]<-predict.SuperLearner(fit$SL_fits$SL_fit_layers[[i]], newdata = dat_slice_valid)$pred
rownames(layer_wise_prediction_valid[[i]])<-rownames(dat_slice_valid)
rm(dat_slice_valid); rm(include_list)
}
combo_valid <- as.data.frame(do.call(cbind, layer_wise_prediction_valid))
names(combo_valid)<-name_layers
if(fit$run_stacked==TRUE){
stacked_prediction_valid<-predict.SuperLearner(fit$SL_fits$SL_fit_stacked, newdata = combo_valid)$pred
rownames(stacked_prediction_valid)<-rownames(combo_valid)
}
if(fit$run_concat==TRUE){
fulldat_valid<-as.data.frame(t(feature_table_valid))
concat_prediction_valid<-predict.SuperLearner(fit$SL_fits$SL_fit_concat,
newdata = fulldat_valid)$pred
rownames(concat_prediction_valid)<-rownames(fulldat_valid)
}
res=list()
if (!is.null(sample_metadata_valid)){
Y_test=validY$Y
res$Y_test =Y_test
}
if(fit$run_concat & fit$run_stacked){
yhat.test <- cbind(combo_valid, stacked_prediction_valid , concat_prediction_valid)
colnames(yhat.test) <- c(colnames(combo_valid),"stacked","concatenated")
}else if(fit$run_concat & !fit$run_stacked){
yhat.test <- cbind(combo_valid, concat_prediction_valid)
colnames(yhat.test) <- c(colnames(combo_valid),"concatenated")
}else if(!fit$run_concat & fit$run_stacked){
yhat.test <- cbind(combo_valid, stacked_prediction_valid )
colnames(yhat.test) <- c(colnames(combo_valid),"stacked")
}else{
yhat.test <- combo_valid
}
res$yhat.test <- yhat.test
if (!is.null(sample_metadata_valid)){
if(fit$family=='binomial'){
# Calculate AUC for each layer, stacked and concatenated
pred=apply(res$yhat.test, 2, ROCR::prediction, labels=res$Y_test)
AUC=vector(length = length(pred))
names(AUC)=names(pred)
for(i in seq_along(pred)){
AUC[i] = round(ROCR::performance(pred[[i]], "auc")@y.values[[1]], 3)
}
res$AUC.test <- AUC
}
if(fit$family=='gaussian'){
# Calculate R^2 for each layer, stacked and concatenated
R2=vector(length = ncol(res$yhat.test))
names(R2)=names(res$yhat.test)
for(i in seq_along(R2)){
R2[i] = as.vector(cor(res$yhat.test[ ,i], res$Y_test)^2)
}
res$R2.test <- R2
}
}
return(res)
}