[4ef077]: / Predict ICU patient admission.R

Download this file

148 lines (116 with data), 5.4 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# Predicting ICU patient admission
#install.packages("caret")
#install.packages("pscl")
#install.packages("Hmisc")
#install.packages("prodlim")
library(readxl)
library("caret") #helps us to split our data into training and testing sets
library(pscl) #gets the pseudo R-square for logistic regression
library(Hmisc) #used to find the correlation and its p-values
### Step 1: Import & summarize the data ####
hospital_df <- read_excel(file.choose())
head(hospital_df)
summary(hospital_df)
### Step 2: Data visualization ####
# Visualizing the target variable
counts <- table(hospital_df$ICU_Admit)
barplot(counts)
### Step 3: Feature engineering: Data pre-processing ####
colSums(is.na(hospital_df)) #counting the number of missing values
hosp_df <- na.omit(hospital_df) #omit all the rows with missing values
summary(hosp_df)
hosp_df$Sex <- ifelse(hosp_df$Sex == 'Male', 1, 0)
head(hosp_df) #Outputs a snapshot of our new columns
### Step 4: Feature selection - selecting the contributing variables to our model ####
corr <- rcorr(as.matrix(hosp_df))
corr #Outputs the correlation results
# Drop all the columns with p-value > 0.05
hosp_df2 <- subset(hosp_df, select = -c(SBP, HR, BMI, CHF, Cancer,
LowIncome, obese))
hosp_df2
### Step 5: Model building ####
# Splitting the data into training and testing sets
set.seed(3456)
trainIndex <- createDataPartition(hosp_df2$ICU_Admit, p = 0.70, list = FALSE, times = 1)
Train <- hosp_df2[ trainIndex,]
Test <- hosp_df2[-trainIndex,]
# First model with all the variables
model <- glm(ICU_Admit ~ ESI + Age + Sex + Temp + RR + Spo2 + qSOFA + MI + Stroke + DM + CKD + Asthma + HTN,
data = Train, family = binomial)
summary(model)
# Results - Age, Sex, MI, Stroke, CKD, Asthma, and HTN are insignificant variables (p > 0.05).
# Remove insignificant variables and rebuild the model
model2 <- glm(ICU_Admit ~ ESI + Temp + RR + Spo2 + qSOFA + DM,
data = Train, family = binomial)
summary(model2)
# Result Interpretation
# Respiratory rate (RR)
# for every one unit change in patient RR, the log odds of ICU admission (versus non-admission) increases by 0.04.
# Diabetes Mellitus (DM)
# a patient diagnosed with diabetes 1, versus no diabetes 0, changes the log odds of ICU admission by 0.95.
### Step 6: Evaluating model performance ####
#### McFadden's R-square (i.e., Pseudo R-square) ####
pR2(model2) #McFadden R-square = 0.27
# Predicting on the test data
fitted.results <- predict(model2, Test, type='response')
# We need to show the probability of patients getting admitted or not
probability <- data.frame(prob = as.data.frame(fitted.results), actual = Test$ICU_Admit)
# Creating a threshold for our probability values. Default threshold is 0.5
fitted.results <- ifelse(fitted.results > 0.5,1,0)
#### Accuracy as a performance measure ####
misClasificError <- mean(fitted.results != Test$ICU_Admit) #misclassification error
print(paste('Accuracy',1-misClasificError))
# count the number of admitted vs not admitted patients in the test data
counts1 <- table(Test$ICU_Admit)
counts1
# using confusion matrix
table(Test$ICU_Admit, fitted.results)
# Interpretation
# TN: 299 patients were correctly predicted as not admitted to the ICU (pred = 0, actual = 0)
# FP: 4 patients were incorrectly predicted as admitted to the ICU (pred = 1, actual = 0)
# FN: 31 patients were incorrectly predicted as not admitted to the ICU (pred = 0, actual = 1)
# TP: 11 patients were correctly predicted as admitted to the ICU (pred = 1, actual = 1)
##### R Shiny - App Development ####
# Load necessary packages
library(shiny)
library(readxl)
# Define UI for the application
ui <- fluidPage(
titlePanel("ICU Admission Prediction"),
sidebarLayout(
sidebarPanel(
numericInput("esi", "Emergency Severity Index", value = 2, min = 1, max = 3),
numericInput("temp", "Temperature (°F)", value = 92, min = 90, max = 105),
numericInput("rr", "Respiratory Rate (breaths/min)", value = 20, min = 10, max = 135),
numericInput("spo2", "Percent Oxygen (SpO2 %)", value = 97, min = 0, max = 100),
numericInput("qsofa", "qSOFA Score", value = 0, min = 0, max = 3),
checkboxInput("dm", "Diabetes Mellitus", value = FALSE),
actionButton("predict", "Predict ICU Admission")
),
mainPanel(
textOutput("result")
)
)
)
# Define server logic
server <- function(input, output) {
# Load the dataset
hospital_df <- read_excel(file.choose())
# Preprocess the data and build the model as in your script
# ...
model2 <- glm(ICU_Admit ~ ESI + Temp + RR + Spo2 + qSOFA + DM, data = hospital_df, family = binomial)
# Reactively predict ICU Admission based on user inputs
observeEvent(input$predict, {
new_data <- data.frame(ESI = as.numeric(input$esi),
Temp = as.numeric(input$temp),
RR = as.numeric(input$rr),
Spo2 = as.numeric(input$spo2),
qSOFA = as.numeric(input$qsofa),
DM = as.integer(input$dm))
pred <- predict(model2, new_data, type = "response")
icu_admission_prob <- round(pred * 100, 2)
output$result <- renderText(paste("Probability of ICU Admission:", icu_admission_prob, "%"))
})
}
# Run the application
shinyApp(ui = ui, server = server)