--- a +++ b/analysis/GB_surfaceplus.Rmd @@ -0,0 +1,132 @@ +--- +title: "Run GB on Combined T cells - surfaceplus" +author: Tobias Roider +date: "Last compiled on `r format(Sys.time(), '%d %B, %Y, %X')`" +output: + rmdformats::readthedown: + +editor_options: + chunk_output_type: console +--- + +```{r options, include=FALSE, warning = FALSE} + +library(knitr) +opts_chunk$set(echo=TRUE, tidy=FALSE, include=TRUE, message=FALSE, + dpi = 100, cache = FALSE, warning = FALSE) +opts_knit$set(root.dir = "../") +options(bitmapType = "cairo") + +``` + +# Load packages +```{r Load packages and functions} + +library(Seurat) +library(tidyverse) +library(caret) +library(rsample) +source("R/Functions.R") + +``` + +# Read data +```{r read data, fig.width=4, fig.height=4} + +sobj <- readRDS("output/Tcells_Integrated.rds") +sobj$IdentI <- factor(sobj$IdentI, levels = sobj$IdentI %>% unique %>% sort()) + +Idents(sobj) <- "IdentI" +DimPlot(sobj, label = T, reduction = "wnn.umap", raster = TRUE, + raster.dpi = c(500,500), group.by = "IdentI")+ + NoLegend() + +``` + +# Split data using rsample package +```{r split data} + +set.seed(1) +split <- sobj@assays$integratedADT@scale.data %>% t %>% data.frame() %>% + cbind(., FetchData(sobj, vars = c("integratedrna_MKI67", "integratedrna_IKZF3", "tfactivity_FOXP3-E", "IdentI"))) %>% + select(-IdentI) %>% + initial_split(prop = 0.35) + +train <- training(split) %>% as.matrix() +test <- testing(split) %>% as.matrix() + +classes_split <- initial_split(as_tibble(Idents(sobj)), prop = 0.35) +classes_split[["in_id"]] <- split[["in_id"]] # Ensures corresponding celltypes +classes_training <- training(classes_split) %>% .$value %>% as.factor() +classes_test <- testing(classes_split) %>% .$value %>% as.factor() + +# Ensure all classes are represented +all(levels(classes_training) == levels(classes_test)) + +``` + +# Train model +```{r train model, message=FALSE, warning=FALSE, results='hide'} + +trControl <- trainControl(method = "cv", number = 10, search = "random", + verboseIter = FALSE, allowParallel = TRUE) + +set.seed(23) +gb_fit <- train(x = train, + y = classes_training, + method = "xgbTree", + importance = TRUE, + metric = "Accuracy", + maximize = TRUE, + trControl = trControl) + +``` + +# Print model +```{r print model} + +print(gb_fit) + +``` + +# Predict +```{r predict} + +predict_test <- predict(gb_fit, newdata = test) + +confusionMatrix(predict_test, classes_test) + +``` + +# Store classes +```{r store} + +names(classes_training) <- rownames(train) +names(classes_test) <- rownames(test) +names(predict_test) <- rownames(test) +classes <- list(classes_training, classes_test, predict_test) +names(classes) <- c("train", "test", "predict") + +``` + +# Show most important markers +```{r show most important markers} + +varImp(gb_fit, scale = TRUE, top=40) + +``` + +# Save objects +```{r save} + +saveRDS(gb_fit, file = "output/GBresults_Combined_T_surfaceplus.rds") +saveRDS(classes, file = "output/GBclasses_Combined_T_surfaceplus.rds") + +``` + +# Session info +```{r session info} + +sessionInfo() + +```