[0375db]: / overview / performance-differences.R

Download this file

116 lines (98 with data), 3.2 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
source('../lib/handymedical.R', chdir = TRUE)
bootstrap.base <- '../../output'
bootstrap.files <-
c(
cox.miss = 'caliber-replicate-with-missing-survreg-6-linear-age-surv-boot.rds',
cox.disc = 'all-cv-survreg-boot-try5-surv-model.rds',
cox.imp = 'caliber-replicate-imputed-survreg-4-surv-boot-imp.rds',
rf = 'rfsrc-cv-nsplit-try3-boot-all.csv',
rf.imp = 'rf-imputed-try1-boot.rds',
rfbig = 'rf-bigdata-varsellogrank-02-boot-all.csv',
coxbig = 'cox-bigdata-varsellogrank-01-boot-all.csv'
)
# Helper functions
# Turn a boot object into a data frame
bootstrap2Df <- function(x) {
df <- data.frame(x$t)
names(df) <- names(x$t0)
df
}
# Make sure calibration scores are bigger = better
calibrationFix <- function(x) {
if(mean(x) < 0.5) {
x <- 1 - x
}
x
}
n <- length(bootstrap.files)
bootstraps <- list()
for(i in 1:n) {
if(fileExt(bootstrap.files[i]) == 'rds'){
bootstraps[[i]] <- readRDS(file.path(bootstrap.base, bootstrap.files[i]))
if(class(bootstraps[[i]]) == 'list') {
# If it's a list, then it's from an imputed dataset with separate bootstraps
# Turn each of these into a data frame and then combine them together.
# (data.frame is needed because rbindlist returns a data.table)
bootstraps[[i]] <-
data.frame(rbindlist(lapply(bootstraps[[i]], bootstrap2Df)))
} else {
bootstraps[[i]] <- bootstrap2Df(bootstraps[[i]] )
}
} else{
bootstraps[[i]] <- read.csv(file.path(bootstrap.base, bootstrap.files[i]))
}
}
x1x2 <- combn(1:n, 2)
x1 <- x1x2[1,]
x2 <- x1x2[2,]
bootstrap.differences <- data.frame()
for(i in 1:length(x1)) {
# C-index
col.1.c.index <-
which(names(bootstraps[[x1[i]]]) %in% c('c.test', 'c.index'))
col.2.c.index <-
which(names(bootstraps[[x2[i]]]) %in% c('c.test', 'c.index'))
boot.diff <-
bootstrapDiff(
bootstraps[[x1[i]]][, col.1.c.index],
bootstraps[[x2[i]]][, col.2.c.index]
)
bootstrap.differences <-
rbind(
bootstrap.differences,
data.frame(
model.1 = names(bootstrap.files)[x1[i]],
model.2 = names(bootstrap.files)[x2[i]],
var = 'c.index',
diff = boot.diff['val'],
lower = boot.diff['lower'],
upper = boot.diff['upper']
)
)
# Calibration score
col.1.calib <-
which(names(bootstraps[[x1[i]]]) == 'calibration.score')
col.2.calib <-
which(names(bootstraps[[x2[i]]]) == 'calibration.score')
boot.diff <-
bootstrapDiff(
calibrationFix(bootstraps[[x1[i]]][, col.1.calib]),
calibrationFix(bootstraps[[x2[i]]][, col.2.calib])
)
bootstrap.differences <-
rbind(
bootstrap.differences,
data.frame(
model.1 = names(bootstrap.files)[x1[i]],
model.2 = names(bootstrap.files)[x2[i]],
var = 'calibration.score',
diff = boot.diff['val'],
lower = boot.diff['lower'],
upper = boot.diff['upper']
)
)
}
# Remove nonsense row names
rownames(bootstrap.differences) <- NULL
print(cbind(bootstrap.differences[, c('model.1', 'model.2', 'var')], round(bootstrap.differences[, 4:6], 3)))
write.csv(bootstrap.differences, '../../output/bootstrap-differences.csv')