Switch to unified view

a b/overview/cohort-tables-full.R
1
data.filename <- '../../data/cohort-sanitised.csv'
2
require(data.table)
3
COHORT <- fread(data.filename)
4
5
percentMissing <- function(x, sf = 3) {
6
  round(sum(is.na(x))/length(x), digits = sf)*100
7
}
8
9
# Remove the patients we shouldn't include
10
COHORT <-
11
  COHORT[
12
    # remove negative times to death
13
    COHORT$time_death > 0 &
14
      # remove patients who should be excluded
15
      !COHORT$exclude
16
    ,
17
    ]
18
19
# Age, 5, 50, 95, %missing
20
print(quantile(COHORT$age, c(0.5, 0.025, 0.975)))
21
22
# Gender
23
print(table(COHORT$gender))
24
print(table(COHORT$gender)/nrow(COHORT)*100)
25
26
# Deprivation, 5, 50, 95, %missing
27
print(quantile(COHORT$imd_score, c(0.5, 0.025, 0.975), na.rm = TRUE))
28
print(percentMissing(COHORT$imd_score))
29
30
# SCAD subtype
31
print(table(COHORT$diagnosis)/nrow(COHORT)*100)
32
33
# PCI
34
print(sum(COHORT$pci_6mo)/nrow(COHORT)*100)
35
36
# CABG
37
print(sum(COHORT$cabg_6mo)/nrow(COHORT)*100)
38
39
# previous/recurrent MI
40
print(sum(COHORT$hx_mi)/nrow(COHORT)*100)
41
42
# nitrates (listed as 1 and NA not T and F)
43
print(sum(COHORT$long_nitrate, na.rm = TRUE)/nrow(COHORT)*100)
44
45
# Smoking, by category, %missing
46
print(table(COHORT$smokstatus)/nrow(COHORT)*100)
47
print(percentMissing(COHORT$smokstatus))
48
49
# Hypertension
50
print(sum(COHORT$hypertension)/nrow(COHORT)*100)
51
52
# Diabetes, yes/no
53
print(
54
  (sum(COHORT$diabetes == 'Diabetes unspecified type') +
55
    sum(COHORT$diabetes == 'Type 1 diabetes') +
56
    sum(COHORT$diabetes == 'Type 2 diabetes')) /nrow(COHORT)*100
57
)
58
59
# Total cholesterol
60
print(quantile(COHORT$total_chol_6mo, c(0.5, 0.025, 0.975), na.rm = TRUE))
61
print(percentMissing(COHORT$total_chol_6mo))
62
63
# HDL
64
print(quantile(COHORT$hdl_6mo, c(0.5, 0.025, 0.975), na.rm = TRUE))
65
print(percentMissing(COHORT$hdl_6mo))
66
67
# Heart failure
68
print(sum(COHORT$heart_failure)/nrow(COHORT)*100)
69
70
# Peripheral arterial disease
71
print(sum(COHORT$pad)/nrow(COHORT)*100)
72
73
# Atrial fibrillation
74
print(sum(COHORT$hx_af)/nrow(COHORT)*100)
75
76
# Stroke
77
print(sum(COHORT$hx_stroke)/nrow(COHORT)*100)
78
79
# Chronic kidney disease
80
print(sum(COHORT$hx_renal)/nrow(COHORT)*100)
81
82
# COPD
83
print(sum(COHORT$hx_copd)/nrow(COHORT)*100)
84
85
# Cancer
86
print(sum(COHORT$hx_cancer)/nrow(COHORT)*100)
87
88
# Chronic liver disease
89
print(sum(COHORT$hx_liver)/nrow(COHORT)*100)
90
91
# Depression
92
print(sum(COHORT$hx_depression)/nrow(COHORT)*100)
93
94
# Anxiety
95
print(sum(COHORT$hx_anxiety)/nrow(COHORT)*100)
96
97
# Heart rate
98
print(quantile(COHORT$pulse_6mo, c(0.5, 0.025, 0.975), na.rm = TRUE))
99
print(percentMissing(COHORT$pulse_6mo))
100
101
# Creatinine
102
print(quantile(COHORT$crea_6mo, c(0.5, 0.025, 0.975), na.rm = TRUE))
103
print(percentMissing(COHORT$crea_6mo))
104
105
# WCC
106
print(quantile(COHORT$total_wbc_6mo, c(0.5, 0.025, 0.975), na.rm = TRUE))
107
print(percentMissing(COHORT$total_wbc_6mo))
108
109
# Haemoglobin
110
print(quantile(COHORT$haemoglobin_6mo, c(0.5, 0.025, 0.975), na.rm = TRUE))
111
print(percentMissing(COHORT$haemoglobin_6mo))
112
113
# Follow-up, 5, 50, 95
114
print(quantile(COHORT$endpoint_death_date, c(0.5, 0.025, 0.975)))/365.25
115
116
# Death vs censored, %
117
print(table(COHORT$endpoint_death)) /nrow(COHORT)*100