Switch to unified view

a b/myeloid/k_bet_publish.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": null,
6
   "metadata": {},
7
   "outputs": [],
8
   "source": [
9
    "library(kBET)\n",
10
    "library(Matrix)\n",
11
    "library(useful)\n",
12
    "library(reshape)\n",
13
    "library(dplyr)"
14
   ]
15
  },
16
  {
17
   "cell_type": "code",
18
   "execution_count": null,
19
   "metadata": {},
20
   "outputs": [],
21
   "source": [
22
    "#Import UMAP coordinates derived from Harmony-adjusted PCs and run kBET for each cluster\n",
23
    "\n",
24
    "setwd(\"/home/ngr18/covid/k_bet\")\n",
25
    "\n",
26
    "umap<-read.csv('umap_coords_harmony.csv', row.names = 1)\n",
27
    "\n",
28
    "clusters<-read.csv('clustering_harmony.csv', row.names = 1)\n",
29
    "\n",
30
    "kBET.umi.counts <- list()\n",
31
    "for (cluster in unique(clusters$initial_clustering)){\n",
32
    "    keep<-rownames(clusters[clusters$initial_clustering == cluster,])\n",
33
    "    kBET.umi.counts[[cluster]] <- kBET(df = umap[rownames(umap) %in% keep,], \n",
34
    "                                     batch = clusters[rownames(clusters) %in% keep,]$sample_id,\n",
35
    "                                     plot = FALSE)\n",
36
    "}\n",
37
    "\n",
38
    "df <- data.frame(score = c(0,0,0))\n",
39
    "for (cluster in unique(clusters$initial_clustering)){\n",
40
    "    label<-paste0(cluster, '_score')\n",
41
    "    df$label<-kBET.umi.counts$cluster[['summary']][2:4,2]\n",
42
    "}\n",
43
    "\n",
44
    "saveRDS(kBET.umi.counts, \"kbet_result_harmony.RDS\")"
45
   ]
46
  },
47
  {
48
   "cell_type": "code",
49
   "execution_count": null,
50
   "metadata": {},
51
   "outputs": [],
52
   "source": [
53
    "#Import UMAP coordinates without any batch correction and run kBET for each cluster\n",
54
    "\n",
55
    "setwd(\"/home/ngr18/covid/k_bet\")\n",
56
    "\n",
57
    "umap<-read.csv('umap_coords_harmony.csv', row.names = 1)\n",
58
    "\n",
59
    "clusters<-read.csv('clustering_harmony.csv', row.names = 1)\n",
60
    "\n",
61
    "kBET.umi.counts <- list()\n",
62
    "for (cluster in unique(clusters$initial_clustering)){\n",
63
    "    keep<-rownames(clusters[clusters$initial_clustering == cluster,])\n",
64
    "    kBET.umi.counts[[cluster]] <- kBET(df = umap[rownames(umap) %in% keep,], \n",
65
    "                                     batch = clusters[rownames(clusters) %in% keep,]$sample_id,\n",
66
    "                                     plot = FALSE)\n",
67
    "}\n",
68
    "\n",
69
    "df <- data.frame(score = c(0,0,0))\n",
70
    "for (cluster in unique(clusters$initial_clustering)){\n",
71
    "    label<-paste0(cluster, '_score')\n",
72
    "    df$label<-kBET.umi.counts$cluster[['summary']][2:4,2]\n",
73
    "}\n",
74
    "\n",
75
    "saveRDS(kBET.umi.counts, \"kbet_result_harmony.RDS\")"
76
   ]
77
  },
78
  {
79
   "cell_type": "code",
80
   "execution_count": null,
81
   "metadata": {},
82
   "outputs": [],
83
   "source": [
84
    "#Above steps run on cluster, so reimport results\n",
85
    "\n",
86
    "harmony<-readRDS(\"kbet_result_harmony.RDS\")\n",
87
    "no_harmony<-readRDS(\"kbet_result_no_harmony.RDS\")\n",
88
    "\n",
89
    "#remove empty result\n",
90
    "cluster_names<-unique(names(harmony))\n",
91
    "cluster_names<-cluster_names[-18]\n",
92
    "\n",
93
    "#combine kBet output in to single dataframe (harmony)\n",
94
    "harmony_result<-data.frame(scores = c(0,0,0))\n",
95
    "for (i in 1:length(cluster_names)){\n",
96
    "    temp<-harmony[[cluster_names[i]]][['summary']][2:4,2]\n",
97
    "   \n",
98
    "    harmony_result<-cbind(harmony_result, temp)\n",
99
    "    colnames(harmony_result)[i+1]<-cluster_names[i]\n",
100
    "}\n",
101
    "\n",
102
    "harmony_result$scores<-NULL\n",
103
    "a<-melt(harmony_result)\n",
104
    "a$treatment<-\"harmony\"\n",
105
    "\n",
106
    "#combine kBet output in to single dataframe (no harmony)\n",
107
    "\n",
108
    "no_harmony_result<-data.frame(scores = c(0,0,0))\n",
109
    "for (i in 1:length(cluster_names)){\n",
110
    "    temp<-no_harmony[[cluster_names[i]]][['summary']][2:4,2]\n",
111
    "   \n",
112
    "    no_harmony_result<-cbind(no_harmony_result, temp)\n",
113
    "    colnames(no_harmony_result)[i+1]<-cluster_names[i]\n",
114
    "}\n",
115
    "\n",
116
    "no_harmony_result$scores<-NULL\n",
117
    "b<-melt(no_harmony_result)\n",
118
    "b$treatment<-\"no_harmony\"\n",
119
    "\n",
120
    "#combine both\n",
121
    "d<-rbind(a,b)\n",
122
    "\n",
123
    "#plot\n",
124
    "options(repr.plot.width=7, repr.plot.height=5)\n",
125
    "\n",
126
    "ggplot(d, aes(variable, value, color = treatment)) +\n",
127
    "geom_boxplot(varwidth=T) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),\n",
128
    "panel.background = element_blank(), axis.line = element_line(colour = \"black\"))+ ylim(0, 1)+\n",
129
    "theme(axis.title.x = element_blank(),\n",
130
    "  axis.title.y = element_blank())+\n",
131
    "theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 20)) +\n",
132
    "theme(axis.text.y = element_text(size = 20))\n",
133
    "\n",
134
    "\n",
135
    "setwd(\"/home/ngr18/covid/figures\")\n",
136
    "ggsave(\"kbet_plot.pdf\", width = 10, height = 10)"
137
   ]
138
  }
139
 ],
140
 "metadata": {
141
  "kernelspec": {
142
   "display_name": "MGC_R_3.6",
143
   "language": "R",
144
   "name": "mgc_r"
145
  },
146
  "language_info": {
147
   "codemirror_mode": "r",
148
   "file_extension": ".r",
149
   "mimetype": "text/x-r-source",
150
   "name": "R",
151
   "pygments_lexer": "r",
152
   "version": "3.6.1"
153
  }
154
 },
155
 "nbformat": 4,
156
 "nbformat_minor": 4
157
}