Switch to unified view

a b/overview/performance-differences.R
1
source('../lib/handymedical.R', chdir = TRUE)
2
3
bootstrap.base <- '../../output'
4
5
bootstrap.files <-
6
  c(
7
    cox.miss = 'caliber-replicate-with-missing-survreg-6-linear-age-surv-boot.rds',
8
    cox.disc = 'all-cv-survreg-boot-try5-surv-model.rds',
9
    cox.imp = 'caliber-replicate-imputed-survreg-4-surv-boot-imp.rds',
10
    rf = 'rfsrc-cv-nsplit-try3-boot-all.csv',
11
    rf.imp = 'rf-imputed-try1-boot.rds',
12
    rfbig = 'rf-bigdata-varsellogrank-02-boot-all.csv',
13
    coxbig = 'cox-bigdata-varsellogrank-01-boot-all.csv'
14
  )
15
16
# Helper functions
17
18
# Turn a boot object into a data frame
19
bootstrap2Df <- function(x) {
20
  df <- data.frame(x$t)
21
  names(df) <- names(x$t0)
22
  df
23
}
24
25
# Make sure calibration scores are bigger = better
26
calibrationFix <- function(x) {
27
  if(mean(x) < 0.5) {
28
    x <- 1 - x
29
  }
30
  x
31
}
32
33
n <- length(bootstrap.files)
34
35
bootstraps <- list()
36
37
for(i in 1:n) {
38
  if(fileExt(bootstrap.files[i]) == 'rds'){
39
    bootstraps[[i]] <- readRDS(file.path(bootstrap.base, bootstrap.files[i]))
40
    
41
    if(class(bootstraps[[i]]) == 'list') {
42
      # If it's a list, then it's from an imputed dataset with separate bootstraps
43
      # Turn each of these into a data frame and then combine them together.
44
      # (data.frame is needed because rbindlist returns a data.table)
45
      bootstraps[[i]] <-
46
        data.frame(rbindlist(lapply(bootstraps[[i]], bootstrap2Df)))
47
    } else {
48
      bootstraps[[i]] <- bootstrap2Df(bootstraps[[i]] )
49
    }
50
  } else{
51
    bootstraps[[i]] <- read.csv(file.path(bootstrap.base, bootstrap.files[i]))
52
  }
53
}
54
55
x1x2 <- combn(1:n, 2)
56
x1 <- x1x2[1,]
57
x2 <- x1x2[2,]
58
59
60
bootstrap.differences <- data.frame()
61
for(i in 1:length(x1)) {
62
  # C-index
63
  col.1.c.index <-
64
    which(names(bootstraps[[x1[i]]]) %in% c('c.test', 'c.index'))
65
  col.2.c.index <-
66
    which(names(bootstraps[[x2[i]]]) %in% c('c.test', 'c.index'))
67
  boot.diff <-
68
    bootstrapDiff(
69
      bootstraps[[x1[i]]][, col.1.c.index],
70
      bootstraps[[x2[i]]][, col.2.c.index]
71
    )
72
  
73
  bootstrap.differences <-
74
    rbind(
75
      bootstrap.differences,
76
      data.frame(
77
        model.1 = names(bootstrap.files)[x1[i]],
78
        model.2 = names(bootstrap.files)[x2[i]],
79
        var = 'c.index',
80
        diff = boot.diff['val'],
81
        lower = boot.diff['lower'],
82
        upper = boot.diff['upper']
83
      )
84
    )
85
  
86
  # Calibration score
87
  col.1.calib <-
88
    which(names(bootstraps[[x1[i]]]) == 'calibration.score')
89
  col.2.calib <-
90
    which(names(bootstraps[[x2[i]]]) == 'calibration.score')
91
  boot.diff <-
92
    bootstrapDiff(
93
      calibrationFix(bootstraps[[x1[i]]][, col.1.calib]),
94
      calibrationFix(bootstraps[[x2[i]]][, col.2.calib])
95
    )
96
  
97
  bootstrap.differences <-
98
    rbind(
99
      bootstrap.differences,
100
      data.frame(
101
        model.1 = names(bootstrap.files)[x1[i]],
102
        model.2 = names(bootstrap.files)[x2[i]],
103
        var = 'calibration.score',
104
        diff = boot.diff['val'],
105
        lower = boot.diff['lower'],
106
        upper = boot.diff['upper']
107
      )
108
    )
109
}
110
111
# Remove nonsense row names
112
rownames(bootstrap.differences) <- NULL
113
114
print(cbind(bootstrap.differences[, c('model.1', 'model.2', 'var')], round(bootstrap.differences[, 4:6], 3)))
115
116
write.csv(bootstrap.differences, '../../output/bootstrap-differences.csv')