MLehealth / Git / Diff of /overview/performance-differences.R

Models:
philipB/
MLehealth
Downloads: 1
Diff of /overview/performance-differences.R [000000] .. [0375db]
Switch to side-by-side view

--- a
+++ b/overview/performance-differences.R
@@ -0,0 +1,116 @@
+source('../lib/handymedical.R', chdir = TRUE)
+
+bootstrap.base <- '../../output'
+
+bootstrap.files <-
+  c(
+    cox.miss = 'caliber-replicate-with-missing-survreg-6-linear-age-surv-boot.rds',
+    cox.disc = 'all-cv-survreg-boot-try5-surv-model.rds',
+    cox.imp = 'caliber-replicate-imputed-survreg-4-surv-boot-imp.rds',
+    rf = 'rfsrc-cv-nsplit-try3-boot-all.csv',
+    rf.imp = 'rf-imputed-try1-boot.rds',
+    rfbig = 'rf-bigdata-varsellogrank-02-boot-all.csv',
+    coxbig = 'cox-bigdata-varsellogrank-01-boot-all.csv'
+  )
+
+# Helper functions
+
+# Turn a boot object into a data frame
+bootstrap2Df <- function(x) {
+  df <- data.frame(x$t)
+  names(df) <- names(x$t0)
+  df
+}
+
+# Make sure calibration scores are bigger = better
+calibrationFix <- function(x) {
+  if(mean(x) < 0.5) {
+    x <- 1 - x
+  }
+  x
+}
+
+n <- length(bootstrap.files)
+
+bootstraps <- list()
+
+for(i in 1:n) {
+  if(fileExt(bootstrap.files[i]) == 'rds'){
+    bootstraps[[i]] <- readRDS(file.path(bootstrap.base, bootstrap.files[i]))
+    
+    if(class(bootstraps[[i]]) == 'list') {
+      # If it's a list, then it's from an imputed dataset with separate bootstraps
+      # Turn each of these into a data frame and then combine them together.
+      # (data.frame is needed because rbindlist returns a data.table)
+      bootstraps[[i]] <-
+        data.frame(rbindlist(lapply(bootstraps[[i]], bootstrap2Df)))
+    } else {
+      bootstraps[[i]] <- bootstrap2Df(bootstraps[[i]] )
+    }
+  } else{
+    bootstraps[[i]] <- read.csv(file.path(bootstrap.base, bootstrap.files[i]))
+  }
+}
+
+x1x2 <- combn(1:n, 2)
+x1 <- x1x2[1,]
+x2 <- x1x2[2,]
+
+
+bootstrap.differences <- data.frame()
+for(i in 1:length(x1)) {
+  # C-index
+  col.1.c.index <-
+    which(names(bootstraps[[x1[i]]]) %in% c('c.test', 'c.index'))
+  col.2.c.index <-
+    which(names(bootstraps[[x2[i]]]) %in% c('c.test', 'c.index'))
+  boot.diff <-
+    bootstrapDiff(
+      bootstraps[[x1[i]]][, col.1.c.index],
+      bootstraps[[x2[i]]][, col.2.c.index]
+    )
+  
+  bootstrap.differences <-
+    rbind(
+      bootstrap.differences,
+      data.frame(
+        model.1 = names(bootstrap.files)[x1[i]],
+        model.2 = names(bootstrap.files)[x2[i]],
+        var = 'c.index',
+        diff = boot.diff['val'],
+        lower = boot.diff['lower'],
+        upper = boot.diff['upper']
+      )
+    )
+  
+  # Calibration score
+  col.1.calib <-
+    which(names(bootstraps[[x1[i]]]) == 'calibration.score')
+  col.2.calib <-
+    which(names(bootstraps[[x2[i]]]) == 'calibration.score')
+  boot.diff <-
+    bootstrapDiff(
+      calibrationFix(bootstraps[[x1[i]]][, col.1.calib]),
+      calibrationFix(bootstraps[[x2[i]]][, col.2.calib])
+    )
+  
+  bootstrap.differences <-
+    rbind(
+      bootstrap.differences,
+      data.frame(
+        model.1 = names(bootstrap.files)[x1[i]],
+        model.2 = names(bootstrap.files)[x2[i]],
+        var = 'calibration.score',
+        diff = boot.diff['val'],
+        lower = boot.diff['lower'],
+        upper = boot.diff['upper']
+      )
+    )
+}
+
+# Remove nonsense row names
+rownames(bootstrap.differences) <- NULL
+
+print(cbind(bootstrap.differences[, c('model.1', 'model.2', 'var')], round(bootstrap.differences[, 4:6], 3)))
+
+write.csv(bootstrap.differences, '../../output/bootstrap-differences.csv')
\ No newline at end of file