Diff of /analysis/GB_surface.Rmd [000000] .. [9905a0]

Switch to unified view

a b/analysis/GB_surface.Rmd
1
---
2
title: "Run GB on Combined T cells - surface"
3
author: Tobias Roider
4
date: "Last compiled on `r format(Sys.time(), '%d %B, %Y, %X')`"
5
output: 
6
  rmdformats::readthedown: 
7
  
8
editor_options: 
9
  chunk_output_type: console
10
---
11
12
```{r options, include=FALSE, warning = FALSE}
13
14
library(knitr)
15
opts_chunk$set(echo=TRUE, tidy=FALSE, include=TRUE, message=FALSE,
16
               dpi = 100, cache = FALSE, warning = FALSE)
17
opts_knit$set(root.dir = "../")
18
options(bitmapType = "cairo")
19
20
```
21
22
# Load packages
23
```{r Load packages and functions}
24
25
library(Seurat)
26
library(tidyverse)
27
library(caret)
28
library(rsample)
29
source("R/Functions.R")
30
31
```
32
33
# Read data
34
```{r read data, fig.width=4, fig.height=4}
35
36
sobj <- readRDS("output/Tcells_Integrated.rds")
37
sobj$IdentI <- factor(sobj$IdentI, levels = sobj$IdentI %>% unique %>% sort())
38
39
Idents(sobj) <- "IdentI"
40
DimPlot(sobj, label = T, reduction = "wnn.umap", raster = TRUE, 
41
        raster.dpi = c(500,500), group.by = "IdentI")+
42
  NoLegend()
43
44
```
45
46
# Split data using rsample package
47
```{r split data}
48
49
set.seed(1)
50
split <- sobj@assays$integratedADT@scale.data %>% t %>% data.frame() %>% 
51
  cbind(., FetchData(sobj, vars = c("IdentI"))) %>% 
52
  select(-IdentI) %>% 
53
  initial_split(prop = 0.35)
54
55
train <- training(split) %>% as.matrix()
56
test <- testing(split) %>% as.matrix()
57
  
58
classes_split <- initial_split(as_tibble(Idents(sobj)), prop = 0.35)
59
classes_split[["in_id"]] <- split[["in_id"]] # Ensures corresponding celltypes
60
classes_training <- training(classes_split) %>% .$value %>% as.factor()
61
classes_test  <- testing(classes_split) %>% .$value %>% as.factor()
62
  
63
# Ensure all classes are represented
64
all(levels(classes_training) == levels(classes_test))
65
66
```
67
68
# Train model
69
```{r train model, message=FALSE, warning=FALSE, results='hide'}
70
71
trControl <- trainControl(method = "cv", number = 10, allowParallel = TRUE,
72
                          search = "random", verboseIter = FALSE)
73
  
74
set.seed(23)
75
gb_fit <- train(x = train,
76
                y = classes_training,
77
                method = "xgbTree",
78
                importance = TRUE,
79
                metric = "Accuracy",
80
                maximize = TRUE,
81
                trControl = trControl)
82
83
```
84
85
# Print model
86
```{r print model}
87
88
print(gb_fit)
89
90
```
91
92
# Predict
93
```{r predict model}
94
95
predict_test <- predict(gb_fit, newdata = test)
96
97
confusionMatrix(predict_test, classes_test)
98
99
```
100
101
# Store classes
102
```{r store classes}
103
104
names(classes_training) <- rownames(train)
105
names(classes_test) <- rownames(test)
106
names(predict_test) <- rownames(test)
107
classes <- list(classes_training, classes_test, predict_test)
108
names(classes) <- c("train", "test", "predict")
109
110
```
111
112
# Show most important markers
113
```{r important markers}
114
115
varImp(gb_fit, scale = TRUE, top=40)
116
117
```
118
119
# Save objects
120
```{r save}
121
122
saveRDS(gb_fit, file = "output/GBresults_Combined_T_surface.rds")
123
saveRDS(classes, file = "output/GBclasses_Combined_T_surface.rds")
124
125
```
126
127
# Session info
128
```{r session info}
129
130
sessionInfo()
131
132
```