158 lines (157 with data), 5.2 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"library(kBET)\n",
"library(Matrix)\n",
"library(useful)\n",
"library(reshape)\n",
"library(dplyr)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Import UMAP coordinates derived from Harmony-adjusted PCs and run kBET for each cluster\n",
"\n",
"setwd(\"/home/ngr18/covid/k_bet\")\n",
"\n",
"umap<-read.csv('umap_coords_harmony.csv', row.names = 1)\n",
"\n",
"clusters<-read.csv('clustering_harmony.csv', row.names = 1)\n",
"\n",
"kBET.umi.counts <- list()\n",
"for (cluster in unique(clusters$initial_clustering)){\n",
" keep<-rownames(clusters[clusters$initial_clustering == cluster,])\n",
" kBET.umi.counts[[cluster]] <- kBET(df = umap[rownames(umap) %in% keep,], \n",
" batch = clusters[rownames(clusters) %in% keep,]$sample_id,\n",
" plot = FALSE)\n",
"}\n",
"\n",
"df <- data.frame(score = c(0,0,0))\n",
"for (cluster in unique(clusters$initial_clustering)){\n",
" label<-paste0(cluster, '_score')\n",
" df$label<-kBET.umi.counts$cluster[['summary']][2:4,2]\n",
"}\n",
"\n",
"saveRDS(kBET.umi.counts, \"kbet_result_harmony.RDS\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Import UMAP coordinates without any batch correction and run kBET for each cluster\n",
"\n",
"setwd(\"/home/ngr18/covid/k_bet\")\n",
"\n",
"umap<-read.csv('umap_coords_harmony.csv', row.names = 1)\n",
"\n",
"clusters<-read.csv('clustering_harmony.csv', row.names = 1)\n",
"\n",
"kBET.umi.counts <- list()\n",
"for (cluster in unique(clusters$initial_clustering)){\n",
" keep<-rownames(clusters[clusters$initial_clustering == cluster,])\n",
" kBET.umi.counts[[cluster]] <- kBET(df = umap[rownames(umap) %in% keep,], \n",
" batch = clusters[rownames(clusters) %in% keep,]$sample_id,\n",
" plot = FALSE)\n",
"}\n",
"\n",
"df <- data.frame(score = c(0,0,0))\n",
"for (cluster in unique(clusters$initial_clustering)){\n",
" label<-paste0(cluster, '_score')\n",
" df$label<-kBET.umi.counts$cluster[['summary']][2:4,2]\n",
"}\n",
"\n",
"saveRDS(kBET.umi.counts, \"kbet_result_harmony.RDS\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Above steps run on cluster, so reimport results\n",
"\n",
"harmony<-readRDS(\"kbet_result_harmony.RDS\")\n",
"no_harmony<-readRDS(\"kbet_result_no_harmony.RDS\")\n",
"\n",
"#remove empty result\n",
"cluster_names<-unique(names(harmony))\n",
"cluster_names<-cluster_names[-18]\n",
"\n",
"#combine kBet output in to single dataframe (harmony)\n",
"harmony_result<-data.frame(scores = c(0,0,0))\n",
"for (i in 1:length(cluster_names)){\n",
" temp<-harmony[[cluster_names[i]]][['summary']][2:4,2]\n",
" \n",
" harmony_result<-cbind(harmony_result, temp)\n",
" colnames(harmony_result)[i+1]<-cluster_names[i]\n",
"}\n",
"\n",
"harmony_result$scores<-NULL\n",
"a<-melt(harmony_result)\n",
"a$treatment<-\"harmony\"\n",
"\n",
"#combine kBet output in to single dataframe (no harmony)\n",
"\n",
"no_harmony_result<-data.frame(scores = c(0,0,0))\n",
"for (i in 1:length(cluster_names)){\n",
" temp<-no_harmony[[cluster_names[i]]][['summary']][2:4,2]\n",
" \n",
" no_harmony_result<-cbind(no_harmony_result, temp)\n",
" colnames(no_harmony_result)[i+1]<-cluster_names[i]\n",
"}\n",
"\n",
"no_harmony_result$scores<-NULL\n",
"b<-melt(no_harmony_result)\n",
"b$treatment<-\"no_harmony\"\n",
"\n",
"#combine both\n",
"d<-rbind(a,b)\n",
"\n",
"#plot\n",
"options(repr.plot.width=7, repr.plot.height=5)\n",
"\n",
"ggplot(d, aes(variable, value, color = treatment)) +\n",
"geom_boxplot(varwidth=T) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),\n",
"panel.background = element_blank(), axis.line = element_line(colour = \"black\"))+ ylim(0, 1)+\n",
"theme(axis.title.x = element_blank(),\n",
" axis.title.y = element_blank())+\n",
"theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 20)) +\n",
"theme(axis.text.y = element_text(size = 20))\n",
"\n",
"\n",
"setwd(\"/home/ngr18/covid/figures\")\n",
"ggsave(\"kbet_plot.pdf\", width = 10, height = 10)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "MGC_R_3.6",
"language": "R",
"name": "mgc_r"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}