[33bf40]: / myeloid / k_bet_publish.ipynb

Download this file

158 lines (157 with data), 5.2 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "library(kBET)\n",
    "library(Matrix)\n",
    "library(useful)\n",
    "library(reshape)\n",
    "library(dplyr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Import UMAP coordinates derived from Harmony-adjusted PCs and run kBET for each cluster\n",
    "\n",
    "setwd(\"/home/ngr18/covid/k_bet\")\n",
    "\n",
    "umap<-read.csv('umap_coords_harmony.csv', row.names = 1)\n",
    "\n",
    "clusters<-read.csv('clustering_harmony.csv', row.names = 1)\n",
    "\n",
    "kBET.umi.counts <- list()\n",
    "for (cluster in unique(clusters$initial_clustering)){\n",
    "    keep<-rownames(clusters[clusters$initial_clustering == cluster,])\n",
    "    kBET.umi.counts[[cluster]] <- kBET(df = umap[rownames(umap) %in% keep,], \n",
    "                                     batch = clusters[rownames(clusters) %in% keep,]$sample_id,\n",
    "                                     plot = FALSE)\n",
    "}\n",
    "\n",
    "df <- data.frame(score = c(0,0,0))\n",
    "for (cluster in unique(clusters$initial_clustering)){\n",
    "    label<-paste0(cluster, '_score')\n",
    "    df$label<-kBET.umi.counts$cluster[['summary']][2:4,2]\n",
    "}\n",
    "\n",
    "saveRDS(kBET.umi.counts, \"kbet_result_harmony.RDS\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Import UMAP coordinates without any batch correction and run kBET for each cluster\n",
    "\n",
    "setwd(\"/home/ngr18/covid/k_bet\")\n",
    "\n",
    "umap<-read.csv('umap_coords_harmony.csv', row.names = 1)\n",
    "\n",
    "clusters<-read.csv('clustering_harmony.csv', row.names = 1)\n",
    "\n",
    "kBET.umi.counts <- list()\n",
    "for (cluster in unique(clusters$initial_clustering)){\n",
    "    keep<-rownames(clusters[clusters$initial_clustering == cluster,])\n",
    "    kBET.umi.counts[[cluster]] <- kBET(df = umap[rownames(umap) %in% keep,], \n",
    "                                     batch = clusters[rownames(clusters) %in% keep,]$sample_id,\n",
    "                                     plot = FALSE)\n",
    "}\n",
    "\n",
    "df <- data.frame(score = c(0,0,0))\n",
    "for (cluster in unique(clusters$initial_clustering)){\n",
    "    label<-paste0(cluster, '_score')\n",
    "    df$label<-kBET.umi.counts$cluster[['summary']][2:4,2]\n",
    "}\n",
    "\n",
    "saveRDS(kBET.umi.counts, \"kbet_result_harmony.RDS\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Above steps run on cluster, so reimport results\n",
    "\n",
    "harmony<-readRDS(\"kbet_result_harmony.RDS\")\n",
    "no_harmony<-readRDS(\"kbet_result_no_harmony.RDS\")\n",
    "\n",
    "#remove empty result\n",
    "cluster_names<-unique(names(harmony))\n",
    "cluster_names<-cluster_names[-18]\n",
    "\n",
    "#combine kBet output in to single dataframe (harmony)\n",
    "harmony_result<-data.frame(scores = c(0,0,0))\n",
    "for (i in 1:length(cluster_names)){\n",
    "    temp<-harmony[[cluster_names[i]]][['summary']][2:4,2]\n",
    "   \n",
    "    harmony_result<-cbind(harmony_result, temp)\n",
    "    colnames(harmony_result)[i+1]<-cluster_names[i]\n",
    "}\n",
    "\n",
    "harmony_result$scores<-NULL\n",
    "a<-melt(harmony_result)\n",
    "a$treatment<-\"harmony\"\n",
    "\n",
    "#combine kBet output in to single dataframe (no harmony)\n",
    "\n",
    "no_harmony_result<-data.frame(scores = c(0,0,0))\n",
    "for (i in 1:length(cluster_names)){\n",
    "    temp<-no_harmony[[cluster_names[i]]][['summary']][2:4,2]\n",
    "   \n",
    "    no_harmony_result<-cbind(no_harmony_result, temp)\n",
    "    colnames(no_harmony_result)[i+1]<-cluster_names[i]\n",
    "}\n",
    "\n",
    "no_harmony_result$scores<-NULL\n",
    "b<-melt(no_harmony_result)\n",
    "b$treatment<-\"no_harmony\"\n",
    "\n",
    "#combine both\n",
    "d<-rbind(a,b)\n",
    "\n",
    "#plot\n",
    "options(repr.plot.width=7, repr.plot.height=5)\n",
    "\n",
    "ggplot(d, aes(variable, value, color = treatment)) +\n",
    "geom_boxplot(varwidth=T) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),\n",
    "panel.background = element_blank(), axis.line = element_line(colour = \"black\"))+ ylim(0, 1)+\n",
    "theme(axis.title.x = element_blank(),\n",
    "  axis.title.y = element_blank())+\n",
    "theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 20)) +\n",
    "theme(axis.text.y = element_text(size = 20))\n",
    "\n",
    "\n",
    "setwd(\"/home/ngr18/covid/figures\")\n",
    "ggsave(\"kbet_plot.pdf\", width = 10, height = 10)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "MGC_R_3.6",
   "language": "R",
   "name": "mgc_r"
  },
  "language_info": {
   "codemirror_mode": "r",
   "file_extension": ".r",
   "mimetype": "text/x-r-source",
   "name": "R",
   "pygments_lexer": "r",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}