[0375db]: / age-only / age-only.R

Download this file

136 lines (111 with data), 3.7 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#+ knitr_setup, include = FALSE
# Whether to cache the intensive code sections. Set to FALSE to recalculate
# everything afresh.
cacheoption <- FALSE
# Disable lazy caching globally, because it fails for large objects, and all the
# objects we wish to cache are large...
opts_chunk$set(cache.lazy = FALSE)
#' # Performance of an age-only model
#'
#+ setup, message=FALSE
bootstraps <- 200
data.filename <- '../../data/cohort-sanitised.csv'
n.threads <- 16
source('../lib/shared.R')
require(rms)
COHORT.full <- data.frame(fread(data.filename))
COHORT.use <- subset(COHORT.full, !exclude)
n.data <- nrow(COHORT.use)
# Define indices of test set
test.set <- testSetIndices(COHORT.use, random.seed = 78361)
COHORT.use <- prepSurvCol(COHORT.use, surv.time, surv.event,surv.event.yes)
#' ## Concordance index
#'
#' The c-index can be calculated taking age itself as a risk score. Since it's
#' purely rank-based, it doesn't matter that age is nonlinearly related to true
#' risk.
#'
#' We bootstrap this based purely on the test set to make the bootstrap
#' variability commensurate with other models tested on the test set. You can
#' imagine that this model was 'trained' on the training set, even though it's
#' so simple that we actually did no such thing...
#'
#+ c_index
# Define a trivial function used for bootstrapping which simply returns the
# c-index of a 'model' based purely on age.
ageOnly <- function(df, indices, df.test) {
# Create a Kaplan-Meier curve from the bootstrap sample
km.by.age <-
survfit(
Surv(surv_time, surv_event) ~ age,
data = df[indices, ],
conf.type = "log-log"
)
# Return the calibration score
c(
calibration.score =
calibrationScore(
calibrationTable(km.by.age, df.test)
)$area
)
}
# C-index is by definition non-variable because we do not bootstrap or otherwise
# permute the test set, and there's no 'training' phase
age.c.index <-
as.numeric(
survConcordance(
Surv(surv_time, surv_event) ~ age,
COHORT.use[test.set,]
)$concordance
)
# Bootstrap to establish variability of calibration
age.only.boot <-
boot(
data = COHORT.use[-test.set,],
statistic = ageOnly,
R = bootstraps,
parallel = 'multicore',
ncpus = n.threads,
df.test = COHORT.use[test.set,]
)
age.only.boot.stats <- bootStats(age.only.boot, uncertainty = '95ci')
#' C-index is
#' **`r round(age.c.index, 3)`**
#' on the held-out test set (not that it really matters, the model isn't
#' 'trained' as such for the discrimination test...it's just oldest patient dies
#' first).
#'
#'
#' ## Calibration
#'
#'
#+ calibration
km.by.age <-
survfit(
Surv(surv_time, surv_event) ~ age,
data = COHORT.use[-test.set,],
conf.type = "log-log"
)
calibration.table <- calibrationTable(km.by.age, COHORT.use[test.set, ])
print(calibrationScore(calibration.table))
calibrationPlot(calibration.table)
#' Calibration score is
#' **`r round(age.only.boot.stats['calibration.score', 'val'], 3)`
#' (`r round(age.only.boot.stats['calibration.score', 'lower'], 3)` -
#' `r round(age.only.boot.stats['calibration.score', 'upper'], 3)`)**
#' on the held-out test set.
varsToTable(
data.frame(
model = 'age',
imputation = FALSE,
discretised = FALSE,
c.index = age.c.index,
c.index.lower = NA,
c.index.upper = NA,
calibration.score = age.only.boot.stats['calibration.score', 'val'],
calibration.score.lower = age.only.boot.stats['calibration.score', 'lower'],
calibration.score.upper = age.only.boot.stats['calibration.score', 'upper']
),
performance.file,
index.cols = c('model', 'imputation', 'discretised')
)