Switch to unified view

a b/code_final/8_CTCL_QC.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "raw",
5
   "id": "266252c3-9b47-4127-a80d-8c99f1770d03",
6
   "metadata": {
7
    "tags": []
8
   },
9
   "source": [
10
    "Author : Aya Balbaa\n",
11
    "\n",
12
    "email: ab72@sanger.ac.uk\n"
13
   ]
14
  },
15
  {
16
   "cell_type": "code",
17
   "execution_count": null,
18
   "id": "60515821-6a45-405f-bfe0-5d26d788218d",
19
   "metadata": {},
20
   "outputs": [],
21
   "source": [
22
    "import numpy as np\n",
23
    "import pandas as pd\n",
24
    "import scanpy as sc\n",
25
    "import scrublet as scr\n",
26
    "import sys\n",
27
    "#import bbknn\n",
28
    "from statsmodels import robust\n",
29
    "import matplotlib.pyplot as plt\n",
30
    "import os.path\n",
31
    "import anndata\n",
32
    "#import harmonypy as hm\n",
33
    "\n",
34
    "sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)\n",
35
    "#sc.settings.set_figure_params(dpi=80, color_map='viridis')\n",
36
    "#sc.logging.print_versions()"
37
   ]
38
  },
39
  {
40
   "cell_type": "code",
41
   "execution_count": null,
42
   "id": "2ace0233-3927-4433-81b9-3e52c45b484c",
43
   "metadata": {},
44
   "outputs": [],
45
   "source": []
46
  },
47
  {
48
   "cell_type": "code",
49
   "execution_count": 11,
50
   "id": "c46858c6-ef81-4d1e-aff3-7b63782e93bb",
51
   "metadata": {
52
    "tags": []
53
   },
54
   "outputs": [],
55
   "source": [
56
    "#path1=\"/lustre/scratch127/cellgen/cellgeni/tickets/tic-2769/work/Sanger/gex/\""
57
   ]
58
  },
59
  {
60
   "cell_type": "code",
61
   "execution_count": 12,
62
   "id": "66e282b5-881d-44df-8d48-f7889af3a13a",
63
   "metadata": {
64
    "tags": []
65
   },
66
   "outputs": [],
67
   "source": [
68
    "#path2= \"/output/Gene/filtered\""
69
   ]
70
  },
71
  {
72
   "cell_type": "code",
73
   "execution_count": 2,
74
   "id": "0a00f4b2-c034-4c7c-84ab-c41784d4d5f0",
75
   "metadata": {
76
    "tags": []
77
   },
78
   "outputs": [],
79
   "source": [
80
    "path1=\"/lustre/scratch127/cellgen/cellgeni/tickets/tic-2769/work/Sanger/gex/cellbender/\""
81
   ]
82
  },
83
  {
84
   "cell_type": "code",
85
   "execution_count": 3,
86
   "id": "19d9b4bc-4303-4e47-b41c-07cb693f68dc",
87
   "metadata": {
88
    "tags": []
89
   },
90
   "outputs": [],
91
   "source": [
92
    "path2= \"/cellbender_out/\""
93
   ]
94
  },
95
  {
96
   "cell_type": "code",
97
   "execution_count": 4,
98
   "id": "d1fd37a7-f39f-4054-a332-1ee3081f1c3f",
99
   "metadata": {
100
    "tags": []
101
   },
102
   "outputs": [],
103
   "source": [
104
    "file_path=('/lustre/scratch126/cellgen/team298/ab72/CTCL/skin_info.csv')"
105
   ]
106
  },
107
  {
108
   "cell_type": "code",
109
   "execution_count": 5,
110
   "id": "c6d69ac6-7775-4cf5-91fa-619818762adf",
111
   "metadata": {
112
    "tags": []
113
   },
114
   "outputs": [],
115
   "source": [
116
    "samples = pd.read_csv(file_path)"
117
   ]
118
  },
119
  {
120
   "cell_type": "code",
121
   "execution_count": 6,
122
   "id": "0755c9fb-85e5-4f8d-9db4-599f44725e54",
123
   "metadata": {
124
    "tags": []
125
   },
126
   "outputs": [
127
    {
128
     "data": {
129
      "text/html": [
130
       "<div>\n",
131
       "<style scoped>\n",
132
       "    .dataframe tbody tr th:only-of-type {\n",
133
       "        vertical-align: middle;\n",
134
       "    }\n",
135
       "\n",
136
       "    .dataframe tbody tr th {\n",
137
       "        vertical-align: top;\n",
138
       "    }\n",
139
       "\n",
140
       "    .dataframe thead th {\n",
141
       "        text-align: right;\n",
142
       "    }\n",
143
       "</style>\n",
144
       "<table border=\"1\" class=\"dataframe\">\n",
145
       "  <thead>\n",
146
       "    <tr style=\"text-align: right;\">\n",
147
       "      <th></th>\n",
148
       "      <th>irods/farm</th>\n",
149
       "      <th>Sample_type</th>\n",
150
       "      <th>Donor</th>\n",
151
       "      <th>Sample_id</th>\n",
152
       "      <th>Tissue</th>\n",
153
       "      <th>Site</th>\n",
154
       "      <th>Sex</th>\n",
155
       "    </tr>\n",
156
       "  </thead>\n",
157
       "  <tbody>\n",
158
       "    <tr>\n",
159
       "      <th>0</th>\n",
160
       "      <td>4820STDY7388991</td>\n",
161
       "      <td>healthy_skin</td>\n",
162
       "      <td>S1</td>\n",
163
       "      <td>4820STDY7388991</td>\n",
164
       "      <td>Dermis</td>\n",
165
       "      <td>non_lesion</td>\n",
166
       "      <td>Female</td>\n",
167
       "    </tr>\n",
168
       "    <tr>\n",
169
       "      <th>1</th>\n",
170
       "      <td>4820STDY7388992</td>\n",
171
       "      <td>healthy_skin</td>\n",
172
       "      <td>S1</td>\n",
173
       "      <td>4820STDY7388992</td>\n",
174
       "      <td>Dermis</td>\n",
175
       "      <td>non_lesion</td>\n",
176
       "      <td>Female</td>\n",
177
       "    </tr>\n",
178
       "    <tr>\n",
179
       "      <th>2</th>\n",
180
       "      <td>4820STDY7388993</td>\n",
181
       "      <td>healthy_skin</td>\n",
182
       "      <td>S1</td>\n",
183
       "      <td>4820STDY7388993</td>\n",
184
       "      <td>Dermis</td>\n",
185
       "      <td>non_lesion</td>\n",
186
       "      <td>Female</td>\n",
187
       "    </tr>\n",
188
       "    <tr>\n",
189
       "      <th>3</th>\n",
190
       "      <td>4820STDY7388994</td>\n",
191
       "      <td>healthy_skin</td>\n",
192
       "      <td>S1</td>\n",
193
       "      <td>4820STDY7388994</td>\n",
194
       "      <td>Dermis</td>\n",
195
       "      <td>non_lesion</td>\n",
196
       "      <td>Female</td>\n",
197
       "    </tr>\n",
198
       "    <tr>\n",
199
       "      <th>4</th>\n",
200
       "      <td>4820STDY7388995</td>\n",
201
       "      <td>healthy_skin</td>\n",
202
       "      <td>S1</td>\n",
203
       "      <td>4820STDY7388995</td>\n",
204
       "      <td>Epidermis</td>\n",
205
       "      <td>non_lesion</td>\n",
206
       "      <td>Female</td>\n",
207
       "    </tr>\n",
208
       "    <tr>\n",
209
       "      <th>...</th>\n",
210
       "      <td>...</td>\n",
211
       "      <td>...</td>\n",
212
       "      <td>...</td>\n",
213
       "      <td>...</td>\n",
214
       "      <td>...</td>\n",
215
       "      <td>...</td>\n",
216
       "      <td>...</td>\n",
217
       "    </tr>\n",
218
       "    <tr>\n",
219
       "      <th>131</th>\n",
220
       "      <td>CTCL3_GEX_4</td>\n",
221
       "      <td>CTCL</td>\n",
222
       "      <td>CTCL3</td>\n",
223
       "      <td>CTCL3_GEX_4</td>\n",
224
       "      <td>Epidermis</td>\n",
225
       "      <td>lesion</td>\n",
226
       "      <td>Female</td>\n",
227
       "    </tr>\n",
228
       "    <tr>\n",
229
       "      <th>132</th>\n",
230
       "      <td>CTCL4_GEX_1</td>\n",
231
       "      <td>CTCL</td>\n",
232
       "      <td>CTCL4</td>\n",
233
       "      <td>CTCL4_GEX_1</td>\n",
234
       "      <td>Dermis</td>\n",
235
       "      <td>lesion</td>\n",
236
       "      <td>Male</td>\n",
237
       "    </tr>\n",
238
       "    <tr>\n",
239
       "      <th>133</th>\n",
240
       "      <td>CTCL4_GEX_2</td>\n",
241
       "      <td>CTCL</td>\n",
242
       "      <td>CTCL4</td>\n",
243
       "      <td>CTCL4_GEX_2</td>\n",
244
       "      <td>Dermis</td>\n",
245
       "      <td>lesion</td>\n",
246
       "      <td>Male</td>\n",
247
       "    </tr>\n",
248
       "    <tr>\n",
249
       "      <th>134</th>\n",
250
       "      <td>CTCL4_GEX_3</td>\n",
251
       "      <td>CTCL</td>\n",
252
       "      <td>CTCL4</td>\n",
253
       "      <td>CTCL4_GEX_3</td>\n",
254
       "      <td>Epidermis</td>\n",
255
       "      <td>lesion</td>\n",
256
       "      <td>Male</td>\n",
257
       "    </tr>\n",
258
       "    <tr>\n",
259
       "      <th>135</th>\n",
260
       "      <td>CTCL4_GEX_4</td>\n",
261
       "      <td>CTCL</td>\n",
262
       "      <td>CTCL4</td>\n",
263
       "      <td>CTCL4_GEX_4</td>\n",
264
       "      <td>Epidermis</td>\n",
265
       "      <td>lesion</td>\n",
266
       "      <td>Male</td>\n",
267
       "    </tr>\n",
268
       "  </tbody>\n",
269
       "</table>\n",
270
       "<p>136 rows × 7 columns</p>\n",
271
       "</div>"
272
      ],
273
      "text/plain": [
274
       "          irods/farm   Sample_type  Donor        Sample_id     Tissue  \\\n",
275
       "0    4820STDY7388991  healthy_skin     S1  4820STDY7388991     Dermis   \n",
276
       "1    4820STDY7388992  healthy_skin     S1  4820STDY7388992     Dermis   \n",
277
       "2    4820STDY7388993  healthy_skin     S1  4820STDY7388993     Dermis   \n",
278
       "3    4820STDY7388994  healthy_skin     S1  4820STDY7388994     Dermis   \n",
279
       "4    4820STDY7388995  healthy_skin     S1  4820STDY7388995  Epidermis   \n",
280
       "..               ...           ...    ...              ...        ...   \n",
281
       "131      CTCL3_GEX_4          CTCL  CTCL3      CTCL3_GEX_4  Epidermis   \n",
282
       "132      CTCL4_GEX_1          CTCL  CTCL4      CTCL4_GEX_1     Dermis   \n",
283
       "133      CTCL4_GEX_2          CTCL  CTCL4      CTCL4_GEX_2     Dermis   \n",
284
       "134      CTCL4_GEX_3          CTCL  CTCL4      CTCL4_GEX_3  Epidermis   \n",
285
       "135      CTCL4_GEX_4          CTCL  CTCL4      CTCL4_GEX_4  Epidermis   \n",
286
       "\n",
287
       "           Site     Sex  \n",
288
       "0    non_lesion  Female  \n",
289
       "1    non_lesion  Female  \n",
290
       "2    non_lesion  Female  \n",
291
       "3    non_lesion  Female  \n",
292
       "4    non_lesion  Female  \n",
293
       "..          ...     ...  \n",
294
       "131      lesion  Female  \n",
295
       "132      lesion    Male  \n",
296
       "133      lesion    Male  \n",
297
       "134      lesion    Male  \n",
298
       "135      lesion    Male  \n",
299
       "\n",
300
       "[136 rows x 7 columns]"
301
      ]
302
     },
303
     "execution_count": 6,
304
     "metadata": {},
305
     "output_type": "execute_result"
306
    }
307
   ],
308
   "source": [
309
    "samples"
310
   ]
311
  },
312
  {
313
   "cell_type": "code",
314
   "execution_count": 34,
315
   "id": "fcce3a97-7fec-44fb-a9ec-1a852896ec20",
316
   "metadata": {
317
    "tags": []
318
   },
319
   "outputs": [],
320
   "source": [
321
    "#import os\n",
322
    "\n",
323
    "# Directory path\n",
324
    "#directory_path = '/lustre/scratch127/cellgen/cellgeni/tickets/tic-2769/work/Sanger/gex/'\n",
325
    "\n",
326
    "# Get list of all subdirectories\n",
327
    "#sub = [name for name in os.listdir(directory_path) if os.path.isdir(os.path.join(directory_path, name))]\n",
328
    "\n",
329
    "#sub=[item for item in sub if item not in ['logs', 'cellbender']]\n",
330
    "\n"
331
   ]
332
  },
333
  {
334
   "cell_type": "code",
335
   "execution_count": 7,
336
   "id": "217c97f3-a00c-455d-a71e-9e7dd82e4e3e",
337
   "metadata": {
338
    "tags": []
339
   },
340
   "outputs": [],
341
   "source": [
342
    "import os\n",
343
    "\n",
344
    "# Directory path\n",
345
    "directory_path = '/lustre/scratch127/cellgen/cellgeni/tickets/tic-2769/work/Sanger/gex/cellbender/'\n",
346
    "\n",
347
    "# Get list of all subdirectories\n",
348
    "sub = [name for name in os.listdir(directory_path) if os.path.isdir(os.path.join(directory_path, name))]\n",
349
    "\n"
350
   ]
351
  },
352
  {
353
   "cell_type": "code",
354
   "execution_count": 8,
355
   "id": "e45c4fde-7fc1-45ae-ab1e-dc992dc62fa1",
356
   "metadata": {
357
    "tags": []
358
   },
359
   "outputs": [],
360
   "source": [
361
    "sample_filt = samples.loc[samples['irods/farm'].isin(sub)]\n"
362
   ]
363
  },
364
  {
365
   "cell_type": "code",
366
   "execution_count": 9,
367
   "id": "a220b9fb-be70-45e2-bd30-17f75138d986",
368
   "metadata": {
369
    "tags": []
370
   },
371
   "outputs": [],
372
   "source": [
373
    "column_list=sample_filt['irods/farm'].tolist()"
374
   ]
375
  },
376
  {
377
   "cell_type": "code",
378
   "execution_count": 10,
379
   "id": "2ada8a67-f82f-4ecb-8386-a6c6c4103d3f",
380
   "metadata": {
381
    "tags": []
382
   },
383
   "outputs": [],
384
   "source": [
385
    "path= [path1+name+path2 for name in column_list]"
386
   ]
387
  },
388
  {
389
   "cell_type": "code",
390
   "execution_count": 11,
391
   "id": "41131d00-0054-454c-9d99-50a19092e1c2",
392
   "metadata": {
393
    "tags": []
394
   },
395
   "outputs": [
396
    {
397
     "name": "stderr",
398
     "output_type": "stream",
399
     "text": [
400
      "/tmp/ipykernel_524030/2675726948.py:1: SettingWithCopyWarning: \n",
401
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
402
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
403
      "\n",
404
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
405
      "  sample_filt['path']= path\n"
406
     ]
407
    },
408
    {
409
     "data": {
410
      "text/html": [
411
       "<div>\n",
412
       "<style scoped>\n",
413
       "    .dataframe tbody tr th:only-of-type {\n",
414
       "        vertical-align: middle;\n",
415
       "    }\n",
416
       "\n",
417
       "    .dataframe tbody tr th {\n",
418
       "        vertical-align: top;\n",
419
       "    }\n",
420
       "\n",
421
       "    .dataframe thead th {\n",
422
       "        text-align: right;\n",
423
       "    }\n",
424
       "</style>\n",
425
       "<table border=\"1\" class=\"dataframe\">\n",
426
       "  <thead>\n",
427
       "    <tr style=\"text-align: right;\">\n",
428
       "      <th></th>\n",
429
       "      <th>irods/farm</th>\n",
430
       "      <th>Sample_type</th>\n",
431
       "      <th>Donor</th>\n",
432
       "      <th>Sample_id</th>\n",
433
       "      <th>Tissue</th>\n",
434
       "      <th>Site</th>\n",
435
       "      <th>Sex</th>\n",
436
       "      <th>path</th>\n",
437
       "    </tr>\n",
438
       "  </thead>\n",
439
       "  <tbody>\n",
440
       "    <tr>\n",
441
       "      <th>96</th>\n",
442
       "      <td>WSSS_SKN8090612</td>\n",
443
       "      <td>CTCL</td>\n",
444
       "      <td>CTCL1</td>\n",
445
       "      <td>CTCL1_GEX_1</td>\n",
446
       "      <td>Epidermis</td>\n",
447
       "      <td>lesion</td>\n",
448
       "      <td>Female</td>\n",
449
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
450
       "    </tr>\n",
451
       "    <tr>\n",
452
       "      <th>97</th>\n",
453
       "      <td>WSSS_SKN8090613</td>\n",
454
       "      <td>CTCL</td>\n",
455
       "      <td>CTCL1</td>\n",
456
       "      <td>CTCL1_GEX_2</td>\n",
457
       "      <td>Epidermis</td>\n",
458
       "      <td>lesion</td>\n",
459
       "      <td>Female</td>\n",
460
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
461
       "    </tr>\n",
462
       "    <tr>\n",
463
       "      <th>98</th>\n",
464
       "      <td>WSSS_SKN8090614</td>\n",
465
       "      <td>CTCL</td>\n",
466
       "      <td>CTCL1</td>\n",
467
       "      <td>CTCL1_GEX_3</td>\n",
468
       "      <td>Dermis</td>\n",
469
       "      <td>lesion</td>\n",
470
       "      <td>Female</td>\n",
471
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
472
       "    </tr>\n",
473
       "    <tr>\n",
474
       "      <th>99</th>\n",
475
       "      <td>WSSS_SKN8090615</td>\n",
476
       "      <td>CTCL</td>\n",
477
       "      <td>CTCL1</td>\n",
478
       "      <td>CTCL1_GEX_4</td>\n",
479
       "      <td>Dermis</td>\n",
480
       "      <td>lesion</td>\n",
481
       "      <td>Female</td>\n",
482
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
483
       "    </tr>\n",
484
       "    <tr>\n",
485
       "      <th>100</th>\n",
486
       "      <td>WSSS_SKN10827890</td>\n",
487
       "      <td>CTCL</td>\n",
488
       "      <td>CTCL5</td>\n",
489
       "      <td>CTCL5_Derm_45N_G</td>\n",
490
       "      <td>Dermis</td>\n",
491
       "      <td>lesion</td>\n",
492
       "      <td>Male</td>\n",
493
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
494
       "    </tr>\n",
495
       "    <tr>\n",
496
       "      <th>101</th>\n",
497
       "      <td>WSSS_SKN10827891</td>\n",
498
       "      <td>CTCL</td>\n",
499
       "      <td>CTCL5</td>\n",
500
       "      <td>CTCL5_Derm_45P_8N_G</td>\n",
501
       "      <td>Dermis</td>\n",
502
       "      <td>lesion</td>\n",
503
       "      <td>Male</td>\n",
504
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
505
       "    </tr>\n",
506
       "    <tr>\n",
507
       "      <th>102</th>\n",
508
       "      <td>WSSS_SKN10827892</td>\n",
509
       "      <td>CTCL</td>\n",
510
       "      <td>CTCL5</td>\n",
511
       "      <td>CTCL5_Derm_45P_8Pr_G</td>\n",
512
       "      <td>Dermis</td>\n",
513
       "      <td>lesion</td>\n",
514
       "      <td>Male</td>\n",
515
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
516
       "    </tr>\n",
517
       "    <tr>\n",
518
       "      <th>103</th>\n",
519
       "      <td>WSSS_SKN10827893</td>\n",
520
       "      <td>CTCL</td>\n",
521
       "      <td>CTCL5</td>\n",
522
       "      <td>CTCL5_Epi_45N_G</td>\n",
523
       "      <td>Epidermis</td>\n",
524
       "      <td>lesion</td>\n",
525
       "      <td>Male</td>\n",
526
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
527
       "    </tr>\n",
528
       "    <tr>\n",
529
       "      <th>104</th>\n",
530
       "      <td>WSSS_SKN10827894</td>\n",
531
       "      <td>CTCL</td>\n",
532
       "      <td>CTCL5</td>\n",
533
       "      <td>CTCL5_Epi_45P_8N_G</td>\n",
534
       "      <td>Epidermis</td>\n",
535
       "      <td>lesion</td>\n",
536
       "      <td>Male</td>\n",
537
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
538
       "    </tr>\n",
539
       "    <tr>\n",
540
       "      <th>105</th>\n",
541
       "      <td>WSSS_SKN10827895</td>\n",
542
       "      <td>CTCL</td>\n",
543
       "      <td>CTCL5</td>\n",
544
       "      <td>CTCL5_Epi_45P_8Pr_G</td>\n",
545
       "      <td>Epidermis</td>\n",
546
       "      <td>lesion</td>\n",
547
       "      <td>Male</td>\n",
548
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
549
       "    </tr>\n",
550
       "    <tr>\n",
551
       "      <th>106</th>\n",
552
       "      <td>WSSS_SKN10827896</td>\n",
553
       "      <td>CTCL</td>\n",
554
       "      <td>CTCL6</td>\n",
555
       "      <td>CTCL6_Derm45N_G</td>\n",
556
       "      <td>Dermis</td>\n",
557
       "      <td>lesion</td>\n",
558
       "      <td>Male</td>\n",
559
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
560
       "    </tr>\n",
561
       "    <tr>\n",
562
       "      <th>107</th>\n",
563
       "      <td>WSSS_SKN10827897</td>\n",
564
       "      <td>CTCL</td>\n",
565
       "      <td>CTCL6</td>\n",
566
       "      <td>CTCL6_Derm45P_8N_G</td>\n",
567
       "      <td>Dermis</td>\n",
568
       "      <td>lesion</td>\n",
569
       "      <td>Male</td>\n",
570
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
571
       "    </tr>\n",
572
       "    <tr>\n",
573
       "      <th>108</th>\n",
574
       "      <td>WSSS_SKN10827898</td>\n",
575
       "      <td>CTCL</td>\n",
576
       "      <td>CTCL6</td>\n",
577
       "      <td>CTCL6_Derm45P_8Pr_G</td>\n",
578
       "      <td>Dermis</td>\n",
579
       "      <td>lesion</td>\n",
580
       "      <td>Male</td>\n",
581
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
582
       "    </tr>\n",
583
       "    <tr>\n",
584
       "      <th>109</th>\n",
585
       "      <td>WSSS_SKN10827899</td>\n",
586
       "      <td>CTCL</td>\n",
587
       "      <td>CTCL6</td>\n",
588
       "      <td>CTCL6_Epi45N_G</td>\n",
589
       "      <td>Epidermis</td>\n",
590
       "      <td>lesion</td>\n",
591
       "      <td>Male</td>\n",
592
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
593
       "    </tr>\n",
594
       "    <tr>\n",
595
       "      <th>110</th>\n",
596
       "      <td>WSSS_SKN10827900</td>\n",
597
       "      <td>CTCL</td>\n",
598
       "      <td>CTCL6</td>\n",
599
       "      <td>CTCL6_Epi45P_8N_G</td>\n",
600
       "      <td>Epidermis</td>\n",
601
       "      <td>lesion</td>\n",
602
       "      <td>Male</td>\n",
603
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
604
       "    </tr>\n",
605
       "    <tr>\n",
606
       "      <th>111</th>\n",
607
       "      <td>WSSS_SKN10827901</td>\n",
608
       "      <td>CTCL</td>\n",
609
       "      <td>CTCL6</td>\n",
610
       "      <td>CTCL6_Epi45P_8Pr_G</td>\n",
611
       "      <td>Epidermis</td>\n",
612
       "      <td>lesion</td>\n",
613
       "      <td>Male</td>\n",
614
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
615
       "    </tr>\n",
616
       "    <tr>\n",
617
       "      <th>112</th>\n",
618
       "      <td>WSSS_SKN10827902</td>\n",
619
       "      <td>CTCL</td>\n",
620
       "      <td>CTCL7</td>\n",
621
       "      <td>CTCL7_Derm45N_G</td>\n",
622
       "      <td>Dermis</td>\n",
623
       "      <td>lesion</td>\n",
624
       "      <td>Female</td>\n",
625
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
626
       "    </tr>\n",
627
       "    <tr>\n",
628
       "      <th>113</th>\n",
629
       "      <td>WSSS_SKN10827903</td>\n",
630
       "      <td>CTCL</td>\n",
631
       "      <td>CTCL7</td>\n",
632
       "      <td>CTCL7_Derm45P_8N_G</td>\n",
633
       "      <td>Dermis</td>\n",
634
       "      <td>lesion</td>\n",
635
       "      <td>Female</td>\n",
636
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
637
       "    </tr>\n",
638
       "    <tr>\n",
639
       "      <th>114</th>\n",
640
       "      <td>WSSS_SKN10827904</td>\n",
641
       "      <td>CTCL</td>\n",
642
       "      <td>CTCL7</td>\n",
643
       "      <td>CTCL7_Derm45P_8Pr_G</td>\n",
644
       "      <td>Dermis</td>\n",
645
       "      <td>lesion</td>\n",
646
       "      <td>Female</td>\n",
647
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
648
       "    </tr>\n",
649
       "    <tr>\n",
650
       "      <th>115</th>\n",
651
       "      <td>WSSS_SKN10827905</td>\n",
652
       "      <td>CTCL</td>\n",
653
       "      <td>CTCL7</td>\n",
654
       "      <td>CTCL7_Epi45N_G</td>\n",
655
       "      <td>Epidermis</td>\n",
656
       "      <td>lesion</td>\n",
657
       "      <td>Female</td>\n",
658
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
659
       "    </tr>\n",
660
       "    <tr>\n",
661
       "      <th>116</th>\n",
662
       "      <td>WSSS_SKN10827906</td>\n",
663
       "      <td>CTCL</td>\n",
664
       "      <td>CTCL7</td>\n",
665
       "      <td>CTCL7_Epi45P_8N_G</td>\n",
666
       "      <td>Epidermis</td>\n",
667
       "      <td>lesion</td>\n",
668
       "      <td>Female</td>\n",
669
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
670
       "    </tr>\n",
671
       "    <tr>\n",
672
       "      <th>117</th>\n",
673
       "      <td>WSSS_SKN10827907</td>\n",
674
       "      <td>CTCL</td>\n",
675
       "      <td>CTCL7</td>\n",
676
       "      <td>CTCL7_Epi45P_8Pr_G</td>\n",
677
       "      <td>Epidermis</td>\n",
678
       "      <td>lesion</td>\n",
679
       "      <td>Female</td>\n",
680
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
681
       "    </tr>\n",
682
       "    <tr>\n",
683
       "      <th>118</th>\n",
684
       "      <td>WSSS_SKN10827908</td>\n",
685
       "      <td>CTCL</td>\n",
686
       "      <td>CTCL8</td>\n",
687
       "      <td>CTCL8_Derm45N_G</td>\n",
688
       "      <td>Dermis</td>\n",
689
       "      <td>lesion</td>\n",
690
       "      <td>Male</td>\n",
691
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
692
       "    </tr>\n",
693
       "    <tr>\n",
694
       "      <th>119</th>\n",
695
       "      <td>WSSS_SKN10827909</td>\n",
696
       "      <td>CTCL</td>\n",
697
       "      <td>CTCL8</td>\n",
698
       "      <td>CTCL8_Derm45P_8N_G</td>\n",
699
       "      <td>Dermis</td>\n",
700
       "      <td>lesion</td>\n",
701
       "      <td>Male</td>\n",
702
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
703
       "    </tr>\n",
704
       "    <tr>\n",
705
       "      <th>120</th>\n",
706
       "      <td>WSSS_SKN10827910</td>\n",
707
       "      <td>CTCL</td>\n",
708
       "      <td>CTCL8</td>\n",
709
       "      <td>CTCL8_Derm45P_8Pr_G</td>\n",
710
       "      <td>Dermis</td>\n",
711
       "      <td>lesion</td>\n",
712
       "      <td>Male</td>\n",
713
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
714
       "    </tr>\n",
715
       "    <tr>\n",
716
       "      <th>121</th>\n",
717
       "      <td>WSSS_SKN10827911</td>\n",
718
       "      <td>CTCL</td>\n",
719
       "      <td>CTCL8</td>\n",
720
       "      <td>CTCL8_Epi45N_G</td>\n",
721
       "      <td>Epidermis</td>\n",
722
       "      <td>lesion</td>\n",
723
       "      <td>Male</td>\n",
724
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
725
       "    </tr>\n",
726
       "    <tr>\n",
727
       "      <th>122</th>\n",
728
       "      <td>WSSS_SKN10827912</td>\n",
729
       "      <td>CTCL</td>\n",
730
       "      <td>CTCL8</td>\n",
731
       "      <td>CTCL8_Epi45P_POOL_G</td>\n",
732
       "      <td>Epidermis</td>\n",
733
       "      <td>lesion</td>\n",
734
       "      <td>Male</td>\n",
735
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
736
       "    </tr>\n",
737
       "    <tr>\n",
738
       "      <th>123</th>\n",
739
       "      <td>CTCL2_GEX_1</td>\n",
740
       "      <td>CTCL</td>\n",
741
       "      <td>CTCL2</td>\n",
742
       "      <td>CTCL2_GEX_1</td>\n",
743
       "      <td>Dermis</td>\n",
744
       "      <td>lesion</td>\n",
745
       "      <td>Male</td>\n",
746
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
747
       "    </tr>\n",
748
       "    <tr>\n",
749
       "      <th>124</th>\n",
750
       "      <td>CTCL2_GEX_2</td>\n",
751
       "      <td>CTCL</td>\n",
752
       "      <td>CTCL2</td>\n",
753
       "      <td>CTCL2_GEX_2</td>\n",
754
       "      <td>Dermis</td>\n",
755
       "      <td>lesion</td>\n",
756
       "      <td>Male</td>\n",
757
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
758
       "    </tr>\n",
759
       "    <tr>\n",
760
       "      <th>125</th>\n",
761
       "      <td>CTCL2_GEX_3</td>\n",
762
       "      <td>CTCL</td>\n",
763
       "      <td>CTCL2</td>\n",
764
       "      <td>CTCL2_GEX_3</td>\n",
765
       "      <td>Dermis</td>\n",
766
       "      <td>lesion</td>\n",
767
       "      <td>Male</td>\n",
768
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
769
       "    </tr>\n",
770
       "    <tr>\n",
771
       "      <th>126</th>\n",
772
       "      <td>CTCL2_GEX_4</td>\n",
773
       "      <td>CTCL</td>\n",
774
       "      <td>CTCL2</td>\n",
775
       "      <td>CTCL2_GEX_4</td>\n",
776
       "      <td>Epidermis</td>\n",
777
       "      <td>lesion</td>\n",
778
       "      <td>Male</td>\n",
779
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
780
       "    </tr>\n",
781
       "    <tr>\n",
782
       "      <th>127</th>\n",
783
       "      <td>CTCL2_GEX_5</td>\n",
784
       "      <td>CTCL</td>\n",
785
       "      <td>CTCL2</td>\n",
786
       "      <td>CTCL2_GEX_5</td>\n",
787
       "      <td>Epidermis</td>\n",
788
       "      <td>lesion</td>\n",
789
       "      <td>Male</td>\n",
790
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
791
       "    </tr>\n",
792
       "    <tr>\n",
793
       "      <th>128</th>\n",
794
       "      <td>CTCL3_GEX_1</td>\n",
795
       "      <td>CTCL</td>\n",
796
       "      <td>CTCL3</td>\n",
797
       "      <td>CTCL3_GEX_1</td>\n",
798
       "      <td>Dermis</td>\n",
799
       "      <td>lesion</td>\n",
800
       "      <td>Female</td>\n",
801
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
802
       "    </tr>\n",
803
       "    <tr>\n",
804
       "      <th>129</th>\n",
805
       "      <td>CTCL3_GEX_2</td>\n",
806
       "      <td>CTCL</td>\n",
807
       "      <td>CTCL3</td>\n",
808
       "      <td>CTCL3_GEX_2</td>\n",
809
       "      <td>Dermis</td>\n",
810
       "      <td>lesion</td>\n",
811
       "      <td>Female</td>\n",
812
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
813
       "    </tr>\n",
814
       "    <tr>\n",
815
       "      <th>130</th>\n",
816
       "      <td>CTCL3_GEX_3</td>\n",
817
       "      <td>CTCL</td>\n",
818
       "      <td>CTCL3</td>\n",
819
       "      <td>CTCL3_GEX_3</td>\n",
820
       "      <td>Dermis</td>\n",
821
       "      <td>lesion</td>\n",
822
       "      <td>Female</td>\n",
823
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
824
       "    </tr>\n",
825
       "    <tr>\n",
826
       "      <th>131</th>\n",
827
       "      <td>CTCL3_GEX_4</td>\n",
828
       "      <td>CTCL</td>\n",
829
       "      <td>CTCL3</td>\n",
830
       "      <td>CTCL3_GEX_4</td>\n",
831
       "      <td>Epidermis</td>\n",
832
       "      <td>lesion</td>\n",
833
       "      <td>Female</td>\n",
834
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
835
       "    </tr>\n",
836
       "    <tr>\n",
837
       "      <th>132</th>\n",
838
       "      <td>CTCL4_GEX_1</td>\n",
839
       "      <td>CTCL</td>\n",
840
       "      <td>CTCL4</td>\n",
841
       "      <td>CTCL4_GEX_1</td>\n",
842
       "      <td>Dermis</td>\n",
843
       "      <td>lesion</td>\n",
844
       "      <td>Male</td>\n",
845
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
846
       "    </tr>\n",
847
       "    <tr>\n",
848
       "      <th>133</th>\n",
849
       "      <td>CTCL4_GEX_2</td>\n",
850
       "      <td>CTCL</td>\n",
851
       "      <td>CTCL4</td>\n",
852
       "      <td>CTCL4_GEX_2</td>\n",
853
       "      <td>Dermis</td>\n",
854
       "      <td>lesion</td>\n",
855
       "      <td>Male</td>\n",
856
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
857
       "    </tr>\n",
858
       "    <tr>\n",
859
       "      <th>134</th>\n",
860
       "      <td>CTCL4_GEX_3</td>\n",
861
       "      <td>CTCL</td>\n",
862
       "      <td>CTCL4</td>\n",
863
       "      <td>CTCL4_GEX_3</td>\n",
864
       "      <td>Epidermis</td>\n",
865
       "      <td>lesion</td>\n",
866
       "      <td>Male</td>\n",
867
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
868
       "    </tr>\n",
869
       "    <tr>\n",
870
       "      <th>135</th>\n",
871
       "      <td>CTCL4_GEX_4</td>\n",
872
       "      <td>CTCL</td>\n",
873
       "      <td>CTCL4</td>\n",
874
       "      <td>CTCL4_GEX_4</td>\n",
875
       "      <td>Epidermis</td>\n",
876
       "      <td>lesion</td>\n",
877
       "      <td>Male</td>\n",
878
       "      <td>/lustre/scratch127/cellgen/cellgeni/tickets/ti...</td>\n",
879
       "    </tr>\n",
880
       "  </tbody>\n",
881
       "</table>\n",
882
       "</div>"
883
      ],
884
      "text/plain": [
885
       "           irods/farm Sample_type  Donor             Sample_id     Tissue  \\\n",
886
       "96    WSSS_SKN8090612        CTCL  CTCL1           CTCL1_GEX_1  Epidermis   \n",
887
       "97    WSSS_SKN8090613        CTCL  CTCL1           CTCL1_GEX_2  Epidermis   \n",
888
       "98    WSSS_SKN8090614        CTCL  CTCL1           CTCL1_GEX_3     Dermis   \n",
889
       "99    WSSS_SKN8090615        CTCL  CTCL1           CTCL1_GEX_4     Dermis   \n",
890
       "100  WSSS_SKN10827890        CTCL  CTCL5      CTCL5_Derm_45N_G     Dermis   \n",
891
       "101  WSSS_SKN10827891        CTCL  CTCL5   CTCL5_Derm_45P_8N_G     Dermis   \n",
892
       "102  WSSS_SKN10827892        CTCL  CTCL5  CTCL5_Derm_45P_8Pr_G     Dermis   \n",
893
       "103  WSSS_SKN10827893        CTCL  CTCL5       CTCL5_Epi_45N_G  Epidermis   \n",
894
       "104  WSSS_SKN10827894        CTCL  CTCL5    CTCL5_Epi_45P_8N_G  Epidermis   \n",
895
       "105  WSSS_SKN10827895        CTCL  CTCL5   CTCL5_Epi_45P_8Pr_G  Epidermis   \n",
896
       "106  WSSS_SKN10827896        CTCL  CTCL6       CTCL6_Derm45N_G     Dermis   \n",
897
       "107  WSSS_SKN10827897        CTCL  CTCL6    CTCL6_Derm45P_8N_G     Dermis   \n",
898
       "108  WSSS_SKN10827898        CTCL  CTCL6   CTCL6_Derm45P_8Pr_G     Dermis   \n",
899
       "109  WSSS_SKN10827899        CTCL  CTCL6        CTCL6_Epi45N_G  Epidermis   \n",
900
       "110  WSSS_SKN10827900        CTCL  CTCL6     CTCL6_Epi45P_8N_G  Epidermis   \n",
901
       "111  WSSS_SKN10827901        CTCL  CTCL6    CTCL6_Epi45P_8Pr_G  Epidermis   \n",
902
       "112  WSSS_SKN10827902        CTCL  CTCL7       CTCL7_Derm45N_G     Dermis   \n",
903
       "113  WSSS_SKN10827903        CTCL  CTCL7    CTCL7_Derm45P_8N_G     Dermis   \n",
904
       "114  WSSS_SKN10827904        CTCL  CTCL7   CTCL7_Derm45P_8Pr_G     Dermis   \n",
905
       "115  WSSS_SKN10827905        CTCL  CTCL7        CTCL7_Epi45N_G  Epidermis   \n",
906
       "116  WSSS_SKN10827906        CTCL  CTCL7     CTCL7_Epi45P_8N_G  Epidermis   \n",
907
       "117  WSSS_SKN10827907        CTCL  CTCL7    CTCL7_Epi45P_8Pr_G  Epidermis   \n",
908
       "118  WSSS_SKN10827908        CTCL  CTCL8       CTCL8_Derm45N_G     Dermis   \n",
909
       "119  WSSS_SKN10827909        CTCL  CTCL8    CTCL8_Derm45P_8N_G     Dermis   \n",
910
       "120  WSSS_SKN10827910        CTCL  CTCL8   CTCL8_Derm45P_8Pr_G     Dermis   \n",
911
       "121  WSSS_SKN10827911        CTCL  CTCL8        CTCL8_Epi45N_G  Epidermis   \n",
912
       "122  WSSS_SKN10827912        CTCL  CTCL8   CTCL8_Epi45P_POOL_G  Epidermis   \n",
913
       "123       CTCL2_GEX_1        CTCL  CTCL2           CTCL2_GEX_1     Dermis   \n",
914
       "124       CTCL2_GEX_2        CTCL  CTCL2           CTCL2_GEX_2     Dermis   \n",
915
       "125       CTCL2_GEX_3        CTCL  CTCL2           CTCL2_GEX_3     Dermis   \n",
916
       "126       CTCL2_GEX_4        CTCL  CTCL2           CTCL2_GEX_4  Epidermis   \n",
917
       "127       CTCL2_GEX_5        CTCL  CTCL2           CTCL2_GEX_5  Epidermis   \n",
918
       "128       CTCL3_GEX_1        CTCL  CTCL3           CTCL3_GEX_1     Dermis   \n",
919
       "129       CTCL3_GEX_2        CTCL  CTCL3           CTCL3_GEX_2     Dermis   \n",
920
       "130       CTCL3_GEX_3        CTCL  CTCL3           CTCL3_GEX_3     Dermis   \n",
921
       "131       CTCL3_GEX_4        CTCL  CTCL3           CTCL3_GEX_4  Epidermis   \n",
922
       "132       CTCL4_GEX_1        CTCL  CTCL4           CTCL4_GEX_1     Dermis   \n",
923
       "133       CTCL4_GEX_2        CTCL  CTCL4           CTCL4_GEX_2     Dermis   \n",
924
       "134       CTCL4_GEX_3        CTCL  CTCL4           CTCL4_GEX_3  Epidermis   \n",
925
       "135       CTCL4_GEX_4        CTCL  CTCL4           CTCL4_GEX_4  Epidermis   \n",
926
       "\n",
927
       "       Site     Sex                                               path  \n",
928
       "96   lesion  Female  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
929
       "97   lesion  Female  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
930
       "98   lesion  Female  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
931
       "99   lesion  Female  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
932
       "100  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
933
       "101  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
934
       "102  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
935
       "103  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
936
       "104  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
937
       "105  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
938
       "106  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
939
       "107  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
940
       "108  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
941
       "109  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
942
       "110  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
943
       "111  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
944
       "112  lesion  Female  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
945
       "113  lesion  Female  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
946
       "114  lesion  Female  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
947
       "115  lesion  Female  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
948
       "116  lesion  Female  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
949
       "117  lesion  Female  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
950
       "118  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
951
       "119  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
952
       "120  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
953
       "121  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
954
       "122  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
955
       "123  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
956
       "124  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
957
       "125  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
958
       "126  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
959
       "127  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
960
       "128  lesion  Female  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
961
       "129  lesion  Female  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
962
       "130  lesion  Female  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
963
       "131  lesion  Female  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
964
       "132  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
965
       "133  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
966
       "134  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  \n",
967
       "135  lesion    Male  /lustre/scratch127/cellgen/cellgeni/tickets/ti...  "
968
      ]
969
     },
970
     "execution_count": 11,
971
     "metadata": {},
972
     "output_type": "execute_result"
973
    }
974
   ],
975
   "source": [
976
    "sample_filt['path']= path\n",
977
    "sample_filt"
978
   ]
979
  },
980
  {
981
   "cell_type": "code",
982
   "execution_count": 12,
983
   "id": "1948a5d3-d330-4dbb-b916-691a750174e8",
984
   "metadata": {
985
    "tags": []
986
   },
987
   "outputs": [],
988
   "source": [
989
    "sample_filt.to_csv('/lustre/scratch126/cellgen/team298/ab72/CTCL/Info_CTCL_processed_with_path_cellbender.csv')"
990
   ]
991
  },
992
  {
993
   "cell_type": "code",
994
   "execution_count": 13,
995
   "id": "0f03aacd-49c9-40a7-924a-91bcf3998e3d",
996
   "metadata": {
997
    "tags": []
998
   },
999
   "outputs": [
1000
    {
1001
     "name": "stdout",
1002
     "output_type": "stream",
1003
     "text": [
1004
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1005
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1006
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1007
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1008
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1009
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1010
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1011
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1012
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1013
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1014
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1015
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1016
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1017
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1018
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1019
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1020
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1021
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1022
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1023
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1024
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1025
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1026
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1027
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1028
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1029
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1030
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1031
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1032
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1033
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1034
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1035
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1036
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1037
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1038
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1039
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1040
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n",
1041
      "--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.\n"
1042
     ]
1043
    },
1044
    {
1045
     "name": "stderr",
1046
     "output_type": "stream",
1047
     "text": [
1048
      "/tmp/ipykernel_524030/1727816446.py:24: FutureWarning: Use anndata.concat instead of AnnData.concatenate, AnnData.concatenate is deprecated and will be removed in the future. See the tutorial for concat at: https://anndata.readthedocs.io/en/latest/concatenation.html\n",
1049
      "  concatenated_adata = adata_list[0].concatenate(adata_list[1:])\n"
1050
     ]
1051
    }
1052
   ],
1053
   "source": [
1054
    "# Initialize a list to store Adata objects\n",
1055
    "adata_list = []\n",
1056
    "\n",
1057
    "# Iterate over each row in the DataFrame\n",
1058
    "for index, row in sample_filt.iterrows():\n",
1059
    "    # Read Adata file\n",
1060
    "    adata = sc.read_10x_mtx(row['path'])\n",
1061
    "\n",
1062
    "    # add sample_type and donor\n",
1063
    "    adata.obs['sample_type'] = row['Sample_type']\n",
1064
    "    adata.obs['Donor'] = row['Donor']\n",
1065
    "    adata.obs['Sanger_ID'] = row['irods/farm']\n",
1066
    "    adata.obs['tissue'] = row['Tissue']\n",
1067
    "    adata.obs['site'] = row['Site']\n",
1068
    "    adata.obs['Sex'] = row['Sex']\n",
1069
    "    \n",
1070
    "    adata.obs_names_make_unique()\n",
1071
    "    adata.var_names_make_unique()\n",
1072
    "    \n",
1073
    "    # Append the Adata object to the list\n",
1074
    "    adata_list.append(adata)\n",
1075
    "\n",
1076
    "# Concatenate all Adata objects\n",
1077
    "concatenated_adata = adata_list[0].concatenate(adata_list[1:])\n"
1078
   ]
1079
  },
1080
  {
1081
   "cell_type": "code",
1082
   "execution_count": 13,
1083
   "id": "99f9ee82-1e48-49cf-9d4d-79e3c1596139",
1084
   "metadata": {
1085
    "jp-MarkdownHeadingCollapsed": true,
1086
    "tags": []
1087
   },
1088
   "outputs": [],
1089
   "source": [
1090
    "concatenated_adata.write_h5ad('/lustre/scratch126/cellgen/team298/ab72/CTCL/ctcl_cellbender_raw_by_pasha_17_5.h5ad')"
1091
   ]
1092
  },
1093
  {
1094
   "cell_type": "code",
1095
   "execution_count": 14,
1096
   "id": "c53d6227-06fe-4b89-b9c8-d2281d7be75b",
1097
   "metadata": {
1098
    "tags": []
1099
   },
1100
   "outputs": [
1101
    {
1102
     "data": {
1103
      "text/plain": [
1104
       "AnnData object with n_obs × n_vars = 737280 × 36601\n",
1105
       "    obs: 'sample_type', 'Donor', 'Sanger_ID', 'tissue', 'site', 'Sex'\n",
1106
       "    var: 'gene_ids', 'feature_types'"
1107
      ]
1108
     },
1109
     "execution_count": 14,
1110
     "metadata": {},
1111
     "output_type": "execute_result"
1112
    }
1113
   ],
1114
   "source": [
1115
    "adata"
1116
   ]
1117
  },
1118
  {
1119
   "cell_type": "code",
1120
   "execution_count": 121,
1121
   "id": "f18cf14a-50bf-4e29-8896-27c16bc7565a",
1122
   "metadata": {
1123
    "tags": []
1124
   },
1125
   "outputs": [],
1126
   "source": [
1127
    "#adata=sc.read('/lustre/scratch126/cellgen/team298/ab72/CTCL/ctcl_raw_by_pasha_2_5.h5ad')"
1128
   ]
1129
  },
1130
  {
1131
   "cell_type": "code",
1132
   "execution_count": 7,
1133
   "id": "0d2f8c25-18be-4f20-9ec2-7c0aaf0df8c6",
1134
   "metadata": {
1135
    "tags": []
1136
   },
1137
   "outputs": [],
1138
   "source": [
1139
    "adata=sc.read('/lustre/scratch126/cellgen/team298/ab72/CTCL/ctcl_cellbender_raw_by_pasha_15_5.h5ad')"
1140
   ]
1141
  },
1142
  {
1143
   "cell_type": "raw",
1144
   "id": "0048922e-9834-4fab-a5cd-89892c86ec48",
1145
   "metadata": {},
1146
   "source": [
1147
    "Doublet detection"
1148
   ]
1149
  },
1150
  {
1151
   "cell_type": "code",
1152
   "execution_count": 16,
1153
   "id": "e6e631b4-45ed-4227-ad20-63917c479592",
1154
   "metadata": {
1155
    "tags": []
1156
   },
1157
   "outputs": [],
1158
   "source": [
1159
    "concatenated_adata.obs['n_counts'] = concatenated_adata.X.sum(axis=1).A1"
1160
   ]
1161
  },
1162
  {
1163
   "cell_type": "code",
1164
   "execution_count": 17,
1165
   "id": "c0c15ec6-7183-435f-8f0b-3f80800fe74b",
1166
   "metadata": {
1167
    "tags": []
1168
   },
1169
   "outputs": [],
1170
   "source": [
1171
    "adata = concatenated_adata[concatenated_adata.obs['n_counts'] > 1, :].copy()"
1172
   ]
1173
  },
1174
  {
1175
   "cell_type": "code",
1176
   "execution_count": 19,
1177
   "id": "58ba3e99-8816-4a6c-bc07-86da94a2ad80",
1178
   "metadata": {
1179
    "tags": []
1180
   },
1181
   "outputs": [],
1182
   "source": [
1183
    "adata.write_h5ad('/lustre/scratch126/cellgen/team298/ab72/CTCL/ctcl_cellbender_raw_by_pasha_17_5.h5ad')"
1184
   ]
1185
  },
1186
  {
1187
   "cell_type": "code",
1188
   "execution_count": 19,
1189
   "id": "273db793-9ac5-4886-94e6-7ae897f695fd",
1190
   "metadata": {
1191
    "tags": []
1192
   },
1193
   "outputs": [
1194
    {
1195
     "data": {
1196
      "text/plain": [
1197
       "n_counts\n",
1198
       "1558.0     81\n",
1199
       "1235.0     79\n",
1200
       "1648.0     77\n",
1201
       "1376.0     77\n",
1202
       "1646.0     77\n",
1203
       "           ..\n",
1204
       "17368.0     1\n",
1205
       "23866.0     1\n",
1206
       "21090.0     1\n",
1207
       "14756.0     1\n",
1208
       "47844.0     1\n",
1209
       "Name: count, Length: 26190, dtype: int64"
1210
      ]
1211
     },
1212
     "execution_count": 19,
1213
     "metadata": {},
1214
     "output_type": "execute_result"
1215
    }
1216
   ],
1217
   "source": [
1218
    "adata.obs['n_counts'].value_counts()"
1219
   ]
1220
  },
1221
  {
1222
   "cell_type": "code",
1223
   "execution_count": 18,
1224
   "id": "882618be-f572-4423-9878-53ab39f13f6a",
1225
   "metadata": {
1226
    "tags": []
1227
   },
1228
   "outputs": [
1229
    {
1230
     "name": "stderr",
1231
     "output_type": "stream",
1232
     "text": [
1233
      "/tmp/ipykernel_2512786/2340965974.py:1: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual.\n",
1234
      "  adata.obs['Donor']=adata.obs['Donor'].astype(str)\n"
1235
     ]
1236
    }
1237
   ],
1238
   "source": [
1239
    "adata.obs['Donor']=adata.obs['Donor'].astype(str)\n",
1240
    "adata.obs['Sanger_ID']=adata.obs['Sanger_ID'].astype(str)\n",
1241
    "adata.obs['donor_lane']= adata.obs['Donor'] + \"_\" + adata.obs['Sanger_ID']"
1242
   ]
1243
  },
1244
  {
1245
   "cell_type": "code",
1246
   "execution_count": 20,
1247
   "id": "cca3be30-717a-4ce8-bfae-0cf5dbcd6837",
1248
   "metadata": {
1249
    "collapsed": true,
1250
    "jupyter": {
1251
     "outputs_hidden": true
1252
    },
1253
    "tags": []
1254
   },
1255
   "outputs": [
1256
    {
1257
     "name": "stdout",
1258
     "output_type": "stream",
1259
     "text": [
1260
      "WSSS_SKN8090612\n",
1261
      "Preprocessing...\n",
1262
      "Simulating doublets...\n",
1263
      "Embedding transcriptomes using PCA...\n",
1264
      "Calculating doublet scores...\n",
1265
      "Automatically set threshold at doublet score = 0.51\n",
1266
      "Detected doublet rate = 0.4%\n",
1267
      "Estimated detectable doublet fraction = 5.8%\n",
1268
      "Overall doublet rate:\n",
1269
      "\tExpected   = 10.0%\n",
1270
      "\tEstimated  = 6.9%\n",
1271
      "Elapsed time: 14.8 seconds\n",
1272
      "Threshold found by scrublet\n",
1273
      "Detected doublet rate = 2.2%\n",
1274
      "Estimated detectable doublet fraction = 21.7%\n",
1275
      "Overall doublet rate:\n",
1276
      "\tExpected   = 10.0%\n",
1277
      "\tEstimated  = 10.3%\n",
1278
      "\n",
1279
      "\n",
1280
      "WSSS_SKN8090613\n",
1281
      "Preprocessing...\n",
1282
      "Simulating doublets...\n",
1283
      "Embedding transcriptomes using PCA...\n",
1284
      "Calculating doublet scores...\n",
1285
      "Automatically set threshold at doublet score = 0.66\n",
1286
      "Detected doublet rate = 0.2%\n",
1287
      "Estimated detectable doublet fraction = 4.4%\n",
1288
      "Overall doublet rate:\n",
1289
      "\tExpected   = 10.0%\n",
1290
      "\tEstimated  = 4.2%\n",
1291
      "Elapsed time: 3.5 seconds\n",
1292
      "Threshold found by scrublet\n",
1293
      "Detected doublet rate = 0.7%\n",
1294
      "Estimated detectable doublet fraction = 8.4%\n",
1295
      "Overall doublet rate:\n",
1296
      "\tExpected   = 10.0%\n",
1297
      "\tEstimated  = 8.2%\n",
1298
      "\n",
1299
      "\n",
1300
      "WSSS_SKN8090614\n",
1301
      "Preprocessing...\n",
1302
      "Simulating doublets...\n",
1303
      "Embedding transcriptomes using PCA...\n",
1304
      "Calculating doublet scores...\n",
1305
      "Automatically set threshold at doublet score = 0.69\n",
1306
      "Detected doublet rate = 0.1%\n",
1307
      "Estimated detectable doublet fraction = 0.4%\n",
1308
      "Overall doublet rate:\n",
1309
      "\tExpected   = 10.0%\n",
1310
      "\tEstimated  = 13.6%\n",
1311
      "Elapsed time: 12.2 seconds\n",
1312
      "Threshold found by scrublet\n",
1313
      "Detected doublet rate = 0.3%\n",
1314
      "Estimated detectable doublet fraction = 2.8%\n",
1315
      "Overall doublet rate:\n",
1316
      "\tExpected   = 10.0%\n",
1317
      "\tEstimated  = 9.8%\n",
1318
      "\n",
1319
      "\n",
1320
      "WSSS_SKN8090615\n",
1321
      "Preprocessing...\n",
1322
      "Simulating doublets...\n",
1323
      "Embedding transcriptomes using PCA...\n",
1324
      "Calculating doublet scores...\n",
1325
      "Automatically set threshold at doublet score = 0.47\n",
1326
      "Detected doublet rate = 1.1%\n",
1327
      "Estimated detectable doublet fraction = 19.2%\n",
1328
      "Overall doublet rate:\n",
1329
      "\tExpected   = 10.0%\n",
1330
      "\tEstimated  = 5.5%\n",
1331
      "Elapsed time: 10.2 seconds\n",
1332
      "Threshold found by scrublet\n",
1333
      "Detected doublet rate = 4.4%\n",
1334
      "Estimated detectable doublet fraction = 39.7%\n",
1335
      "Overall doublet rate:\n",
1336
      "\tExpected   = 10.0%\n",
1337
      "\tEstimated  = 11.1%\n",
1338
      "\n",
1339
      "\n",
1340
      "WSSS_SKN10827890\n",
1341
      "Preprocessing...\n",
1342
      "Simulating doublets...\n",
1343
      "Embedding transcriptomes using PCA...\n",
1344
      "Calculating doublet scores...\n",
1345
      "Automatically set threshold at doublet score = 0.37\n",
1346
      "Detected doublet rate = 2.3%\n",
1347
      "Estimated detectable doublet fraction = 23.4%\n",
1348
      "Overall doublet rate:\n",
1349
      "\tExpected   = 10.0%\n",
1350
      "\tEstimated  = 9.8%\n",
1351
      "Elapsed time: 17.9 seconds\n",
1352
      "Threshold found by scrublet\n",
1353
      "Detected doublet rate = 4.3%\n",
1354
      "Estimated detectable doublet fraction = 33.4%\n",
1355
      "Overall doublet rate:\n",
1356
      "\tExpected   = 10.0%\n",
1357
      "\tEstimated  = 12.9%\n",
1358
      "\n",
1359
      "\n",
1360
      "WSSS_SKN10827891\n",
1361
      "Preprocessing...\n",
1362
      "Simulating doublets...\n",
1363
      "Embedding transcriptomes using PCA...\n",
1364
      "Calculating doublet scores...\n",
1365
      "Automatically set threshold at doublet score = 0.76\n",
1366
      "Detected doublet rate = 0.0%\n",
1367
      "Estimated detectable doublet fraction = 0.2%\n",
1368
      "Overall doublet rate:\n",
1369
      "\tExpected   = 10.0%\n",
1370
      "\tEstimated  = 0.0%\n",
1371
      "Elapsed time: 10.1 seconds\n",
1372
      "Threshold found by scrublet\n",
1373
      "Detected doublet rate = 0.7%\n",
1374
      "Estimated detectable doublet fraction = 7.2%\n",
1375
      "Overall doublet rate:\n",
1376
      "\tExpected   = 10.0%\n",
1377
      "\tEstimated  = 9.1%\n",
1378
      "\n",
1379
      "\n",
1380
      "WSSS_SKN10827892\n",
1381
      "Preprocessing...\n",
1382
      "Simulating doublets...\n",
1383
      "Embedding transcriptomes using PCA...\n",
1384
      "Calculating doublet scores...\n",
1385
      "Automatically set threshold at doublet score = 0.54\n",
1386
      "Detected doublet rate = 0.2%\n",
1387
      "Estimated detectable doublet fraction = 3.8%\n",
1388
      "Overall doublet rate:\n",
1389
      "\tExpected   = 10.0%\n",
1390
      "\tEstimated  = 6.4%\n",
1391
      "Elapsed time: 12.2 seconds\n",
1392
      "Threshold found by scrublet\n",
1393
      "Detected doublet rate = 1.0%\n",
1394
      "Estimated detectable doublet fraction = 13.9%\n",
1395
      "Overall doublet rate:\n",
1396
      "\tExpected   = 10.0%\n",
1397
      "\tEstimated  = 7.4%\n",
1398
      "\n",
1399
      "\n",
1400
      "WSSS_SKN10827893\n",
1401
      "Preprocessing...\n",
1402
      "Simulating doublets...\n",
1403
      "Embedding transcriptomes using PCA...\n",
1404
      "Calculating doublet scores...\n",
1405
      "Automatically set threshold at doublet score = 0.67\n",
1406
      "Detected doublet rate = 0.0%\n",
1407
      "Estimated detectable doublet fraction = 0.5%\n",
1408
      "Overall doublet rate:\n",
1409
      "\tExpected   = 10.0%\n",
1410
      "\tEstimated  = 6.7%\n",
1411
      "Elapsed time: 21.6 seconds\n",
1412
      "Threshold found by scrublet\n",
1413
      "Detected doublet rate = 2.3%\n",
1414
      "Estimated detectable doublet fraction = 14.4%\n",
1415
      "Overall doublet rate:\n",
1416
      "\tExpected   = 10.0%\n",
1417
      "\tEstimated  = 15.9%\n",
1418
      "\n",
1419
      "\n",
1420
      "WSSS_SKN10827894\n",
1421
      "Preprocessing...\n",
1422
      "Simulating doublets...\n",
1423
      "Embedding transcriptomes using PCA...\n",
1424
      "Calculating doublet scores...\n",
1425
      "Automatically set threshold at doublet score = 0.37\n",
1426
      "Detected doublet rate = 1.3%\n",
1427
      "Estimated detectable doublet fraction = 24.3%\n",
1428
      "Overall doublet rate:\n",
1429
      "\tExpected   = 10.0%\n",
1430
      "\tEstimated  = 5.4%\n",
1431
      "Elapsed time: 8.9 seconds\n",
1432
      "Threshold found by scrublet\n",
1433
      "Detected doublet rate = 3.7%\n",
1434
      "Estimated detectable doublet fraction = 41.6%\n",
1435
      "Overall doublet rate:\n",
1436
      "\tExpected   = 10.0%\n",
1437
      "\tEstimated  = 8.9%\n",
1438
      "\n",
1439
      "\n",
1440
      "WSSS_SKN10827895\n",
1441
      "Preprocessing...\n",
1442
      "Simulating doublets...\n",
1443
      "Embedding transcriptomes using PCA...\n",
1444
      "Calculating doublet scores...\n",
1445
      "Automatically set threshold at doublet score = 0.40\n",
1446
      "Detected doublet rate = 2.1%\n",
1447
      "Estimated detectable doublet fraction = 22.2%\n",
1448
      "Overall doublet rate:\n",
1449
      "\tExpected   = 10.0%\n",
1450
      "\tEstimated  = 9.4%\n",
1451
      "Elapsed time: 18.9 seconds\n",
1452
      "Threshold found by scrublet\n",
1453
      "Detected doublet rate = 2.8%\n",
1454
      "Estimated detectable doublet fraction = 26.3%\n",
1455
      "Overall doublet rate:\n",
1456
      "\tExpected   = 10.0%\n",
1457
      "\tEstimated  = 10.5%\n",
1458
      "\n",
1459
      "\n",
1460
      "WSSS_SKN10827896\n",
1461
      "Preprocessing...\n",
1462
      "Simulating doublets...\n",
1463
      "Embedding transcriptomes using PCA...\n",
1464
      "Calculating doublet scores...\n",
1465
      "Automatically set threshold at doublet score = 0.41\n",
1466
      "Detected doublet rate = 2.0%\n",
1467
      "Estimated detectable doublet fraction = 17.5%\n",
1468
      "Overall doublet rate:\n",
1469
      "\tExpected   = 10.0%\n",
1470
      "\tEstimated  = 11.6%\n",
1471
      "Elapsed time: 32.4 seconds\n",
1472
      "Threshold found by scrublet\n",
1473
      "Detected doublet rate = 3.8%\n",
1474
      "Estimated detectable doublet fraction = 24.5%\n",
1475
      "Overall doublet rate:\n",
1476
      "\tExpected   = 10.0%\n",
1477
      "\tEstimated  = 15.5%\n",
1478
      "\n",
1479
      "\n",
1480
      "WSSS_SKN10827897\n",
1481
      "Preprocessing...\n",
1482
      "Simulating doublets...\n",
1483
      "Embedding transcriptomes using PCA...\n",
1484
      "Calculating doublet scores...\n",
1485
      "Automatically set threshold at doublet score = 0.79\n",
1486
      "Detected doublet rate = 0.0%\n",
1487
      "Estimated detectable doublet fraction = 0.1%\n",
1488
      "Overall doublet rate:\n",
1489
      "\tExpected   = 10.0%\n",
1490
      "\tEstimated  = 16.7%\n",
1491
      "Elapsed time: 22.4 seconds\n",
1492
      "Threshold found by scrublet\n",
1493
      "Detected doublet rate = 0.4%\n",
1494
      "Estimated detectable doublet fraction = 3.9%\n",
1495
      "Overall doublet rate:\n",
1496
      "\tExpected   = 10.0%\n",
1497
      "\tEstimated  = 9.4%\n",
1498
      "\n",
1499
      "\n",
1500
      "WSSS_SKN10827898\n",
1501
      "Preprocessing...\n",
1502
      "Simulating doublets...\n",
1503
      "Embedding transcriptomes using PCA...\n",
1504
      "Calculating doublet scores...\n",
1505
      "Automatically set threshold at doublet score = 0.70\n",
1506
      "Detected doublet rate = 0.0%\n",
1507
      "Estimated detectable doublet fraction = 0.3%\n",
1508
      "Overall doublet rate:\n",
1509
      "\tExpected   = 10.0%\n",
1510
      "\tEstimated  = 6.8%\n",
1511
      "Elapsed time: 18.0 seconds\n",
1512
      "Threshold found by scrublet\n",
1513
      "Detected doublet rate = 1.0%\n",
1514
      "Estimated detectable doublet fraction = 10.1%\n",
1515
      "Overall doublet rate:\n",
1516
      "\tExpected   = 10.0%\n",
1517
      "\tEstimated  = 9.6%\n",
1518
      "\n",
1519
      "\n",
1520
      "WSSS_SKN10827899\n",
1521
      "Preprocessing...\n",
1522
      "Simulating doublets...\n",
1523
      "Embedding transcriptomes using PCA...\n",
1524
      "Calculating doublet scores...\n",
1525
      "Automatically set threshold at doublet score = 0.43\n",
1526
      "Detected doublet rate = 0.7%\n",
1527
      "Estimated detectable doublet fraction = 14.9%\n",
1528
      "Overall doublet rate:\n",
1529
      "\tExpected   = 10.0%\n",
1530
      "\tEstimated  = 4.8%\n",
1531
      "Elapsed time: 5.1 seconds\n",
1532
      "Threshold found by scrublet\n",
1533
      "Detected doublet rate = 2.8%\n",
1534
      "Estimated detectable doublet fraction = 25.8%\n",
1535
      "Overall doublet rate:\n",
1536
      "\tExpected   = 10.0%\n",
1537
      "\tEstimated  = 11.0%\n",
1538
      "\n",
1539
      "\n",
1540
      "WSSS_SKN10827900\n",
1541
      "Preprocessing...\n",
1542
      "Simulating doublets...\n",
1543
      "Embedding transcriptomes using PCA...\n",
1544
      "Calculating doublet scores...\n",
1545
      "Automatically set threshold at doublet score = 0.65\n",
1546
      "Detected doublet rate = 0.4%\n",
1547
      "Estimated detectable doublet fraction = 4.7%\n",
1548
      "Overall doublet rate:\n",
1549
      "\tExpected   = 10.0%\n",
1550
      "\tEstimated  = 9.2%\n",
1551
      "Elapsed time: 4.6 seconds\n",
1552
      "Threshold found by scrublet\n",
1553
      "Detected doublet rate = 1.7%\n",
1554
      "Estimated detectable doublet fraction = 17.0%\n",
1555
      "Overall doublet rate:\n",
1556
      "\tExpected   = 10.0%\n",
1557
      "\tEstimated  = 10.2%\n",
1558
      "\n",
1559
      "\n",
1560
      "WSSS_SKN10827901\n",
1561
      "Preprocessing...\n",
1562
      "Simulating doublets...\n",
1563
      "Embedding transcriptomes using PCA...\n",
1564
      "Calculating doublet scores...\n",
1565
      "Automatically set threshold at doublet score = 0.82\n",
1566
      "Detected doublet rate = 0.0%\n",
1567
      "Estimated detectable doublet fraction = 0.0%\n",
1568
      "Overall doublet rate:\n",
1569
      "\tExpected   = 10.0%\n",
1570
      "\tEstimated  = 0.0%\n",
1571
      "Elapsed time: 38.9 seconds\n",
1572
      "Threshold found by scrublet\n",
1573
      "Detected doublet rate = 2.1%\n",
1574
      "Estimated detectable doublet fraction = 15.2%\n",
1575
      "Overall doublet rate:\n",
1576
      "\tExpected   = 10.0%\n",
1577
      "\tEstimated  = 13.5%\n",
1578
      "\n",
1579
      "\n",
1580
      "WSSS_SKN10827902\n",
1581
      "Preprocessing...\n",
1582
      "Simulating doublets...\n",
1583
      "Embedding transcriptomes using PCA...\n",
1584
      "Calculating doublet scores...\n",
1585
      "Automatically set threshold at doublet score = 0.30\n",
1586
      "Detected doublet rate = 4.1%\n",
1587
      "Estimated detectable doublet fraction = 33.7%\n",
1588
      "Overall doublet rate:\n",
1589
      "\tExpected   = 10.0%\n",
1590
      "\tEstimated  = 12.2%\n",
1591
      "Elapsed time: 36.2 seconds\n",
1592
      "Threshold found by scrublet\n",
1593
      "Detected doublet rate = 8.1%\n",
1594
      "Estimated detectable doublet fraction = 50.8%\n",
1595
      "Overall doublet rate:\n",
1596
      "\tExpected   = 10.0%\n",
1597
      "\tEstimated  = 15.9%\n",
1598
      "\n",
1599
      "\n",
1600
      "WSSS_SKN10827903\n",
1601
      "Preprocessing...\n",
1602
      "Simulating doublets...\n",
1603
      "Embedding transcriptomes using PCA...\n",
1604
      "Calculating doublet scores...\n",
1605
      "Automatically set threshold at doublet score = 0.60\n",
1606
      "Detected doublet rate = 0.0%\n",
1607
      "Estimated detectable doublet fraction = 0.0%\n",
1608
      "Overall doublet rate:\n",
1609
      "\tExpected   = 10.0%\n",
1610
      "\tEstimated  = 0.0%\n",
1611
      "Elapsed time: 120.3 seconds\n",
1612
      "Threshold found by scrublet\n",
1613
      "Detected doublet rate = 0.0%\n",
1614
      "Estimated detectable doublet fraction = 0.4%\n",
1615
      "Overall doublet rate:\n",
1616
      "\tExpected   = 10.0%\n",
1617
      "\tEstimated  = 11.4%\n",
1618
      "\n",
1619
      "\n",
1620
      "WSSS_SKN10827904\n",
1621
      "Preprocessing...\n",
1622
      "Simulating doublets...\n",
1623
      "Embedding transcriptomes using PCA...\n",
1624
      "Calculating doublet scores...\n",
1625
      "Automatically set threshold at doublet score = 0.26\n",
1626
      "Detected doublet rate = 8.3%\n",
1627
      "Estimated detectable doublet fraction = 35.8%\n",
1628
      "Overall doublet rate:\n",
1629
      "\tExpected   = 10.0%\n",
1630
      "\tEstimated  = 23.1%\n",
1631
      "Elapsed time: 66.1 seconds\n",
1632
      "Threshold found by scrublet\n",
1633
      "Detected doublet rate = 1.1%\n",
1634
      "Estimated detectable doublet fraction = 7.5%\n",
1635
      "Overall doublet rate:\n",
1636
      "\tExpected   = 10.0%\n",
1637
      "\tEstimated  = 14.3%\n",
1638
      "\n",
1639
      "\n",
1640
      "WSSS_SKN10827905\n",
1641
      "Preprocessing...\n",
1642
      "Simulating doublets...\n",
1643
      "Embedding transcriptomes using PCA...\n",
1644
      "Calculating doublet scores...\n",
1645
      "Automatically set threshold at doublet score = 0.40\n",
1646
      "Detected doublet rate = 0.7%\n",
1647
      "Estimated detectable doublet fraction = 18.7%\n",
1648
      "Overall doublet rate:\n",
1649
      "\tExpected   = 10.0%\n",
1650
      "\tEstimated  = 3.5%\n",
1651
      "Elapsed time: 2.7 seconds\n",
1652
      "Threshold found by scrublet\n",
1653
      "Detected doublet rate = 2.3%\n",
1654
      "Estimated detectable doublet fraction = 29.3%\n",
1655
      "Overall doublet rate:\n",
1656
      "\tExpected   = 10.0%\n",
1657
      "\tEstimated  = 8.0%\n",
1658
      "\n",
1659
      "\n",
1660
      "WSSS_SKN10827906\n",
1661
      "Preprocessing...\n",
1662
      "Simulating doublets...\n",
1663
      "Embedding transcriptomes using PCA...\n",
1664
      "Calculating doublet scores...\n",
1665
      "Automatically set threshold at doublet score = 0.43\n",
1666
      "Detected doublet rate = 2.6%\n",
1667
      "Estimated detectable doublet fraction = 28.6%\n",
1668
      "Overall doublet rate:\n",
1669
      "\tExpected   = 10.0%\n",
1670
      "\tEstimated  = 9.2%\n",
1671
      "Elapsed time: 8.4 seconds\n",
1672
      "Threshold found by scrublet\n",
1673
      "Detected doublet rate = 3.2%\n",
1674
      "Estimated detectable doublet fraction = 31.8%\n",
1675
      "Overall doublet rate:\n",
1676
      "\tExpected   = 10.0%\n",
1677
      "\tEstimated  = 10.2%\n",
1678
      "\n",
1679
      "\n",
1680
      "WSSS_SKN10827907\n",
1681
      "Preprocessing...\n",
1682
      "Simulating doublets...\n",
1683
      "Embedding transcriptomes using PCA...\n",
1684
      "Calculating doublet scores...\n",
1685
      "Automatically set threshold at doublet score = 0.65\n",
1686
      "Detected doublet rate = 0.2%\n",
1687
      "Estimated detectable doublet fraction = 5.2%\n",
1688
      "Overall doublet rate:\n",
1689
      "\tExpected   = 10.0%\n",
1690
      "\tEstimated  = 3.9%\n",
1691
      "Elapsed time: 7.5 seconds\n",
1692
      "Threshold found by scrublet\n",
1693
      "Detected doublet rate = 3.7%\n",
1694
      "Estimated detectable doublet fraction = 34.2%\n",
1695
      "Overall doublet rate:\n",
1696
      "\tExpected   = 10.0%\n",
1697
      "\tEstimated  = 10.8%\n",
1698
      "\n",
1699
      "\n",
1700
      "WSSS_SKN10827908\n",
1701
      "Preprocessing...\n",
1702
      "Simulating doublets...\n",
1703
      "Embedding transcriptomes using PCA...\n",
1704
      "Calculating doublet scores...\n",
1705
      "Automatically set threshold at doublet score = 0.38\n",
1706
      "Detected doublet rate = 3.0%\n",
1707
      "Estimated detectable doublet fraction = 20.9%\n",
1708
      "Overall doublet rate:\n",
1709
      "\tExpected   = 10.0%\n",
1710
      "\tEstimated  = 14.4%\n",
1711
      "Elapsed time: 35.9 seconds\n",
1712
      "Threshold found by scrublet\n",
1713
      "Detected doublet rate = 4.0%\n",
1714
      "Estimated detectable doublet fraction = 25.4%\n",
1715
      "Overall doublet rate:\n",
1716
      "\tExpected   = 10.0%\n",
1717
      "\tEstimated  = 15.7%\n",
1718
      "\n",
1719
      "\n",
1720
      "WSSS_SKN10827909\n",
1721
      "Preprocessing...\n",
1722
      "Simulating doublets...\n",
1723
      "Embedding transcriptomes using PCA...\n",
1724
      "Calculating doublet scores...\n",
1725
      "Automatically set threshold at doublet score = 0.68\n",
1726
      "Detected doublet rate = 0.0%\n",
1727
      "Estimated detectable doublet fraction = 0.2%\n",
1728
      "Overall doublet rate:\n",
1729
      "\tExpected   = 10.0%\n",
1730
      "\tEstimated  = 11.1%\n",
1731
      "Elapsed time: 29.5 seconds\n",
1732
      "Threshold found by scrublet\n",
1733
      "Detected doublet rate = 0.4%\n",
1734
      "Estimated detectable doublet fraction = 2.7%\n",
1735
      "Overall doublet rate:\n",
1736
      "\tExpected   = 10.0%\n",
1737
      "\tEstimated  = 15.0%\n",
1738
      "\n",
1739
      "\n",
1740
      "WSSS_SKN10827910\n",
1741
      "Preprocessing...\n",
1742
      "Simulating doublets...\n",
1743
      "Embedding transcriptomes using PCA...\n",
1744
      "Calculating doublet scores...\n",
1745
      "Automatically set threshold at doublet score = 0.66\n",
1746
      "Detected doublet rate = 0.0%\n",
1747
      "Estimated detectable doublet fraction = 0.2%\n",
1748
      "Overall doublet rate:\n",
1749
      "\tExpected   = 10.0%\n",
1750
      "\tEstimated  = 13.0%\n",
1751
      "Elapsed time: 49.5 seconds\n",
1752
      "Threshold found by scrublet\n",
1753
      "Detected doublet rate = 1.7%\n",
1754
      "Estimated detectable doublet fraction = 12.8%\n",
1755
      "Overall doublet rate:\n",
1756
      "\tExpected   = 10.0%\n",
1757
      "\tEstimated  = 13.3%\n",
1758
      "\n",
1759
      "\n",
1760
      "WSSS_SKN10827911\n",
1761
      "Preprocessing...\n",
1762
      "Simulating doublets...\n",
1763
      "Embedding transcriptomes using PCA...\n",
1764
      "Calculating doublet scores...\n",
1765
      "Automatically set threshold at doublet score = 0.74\n",
1766
      "Detected doublet rate = 0.0%\n",
1767
      "Estimated detectable doublet fraction = 1.1%\n",
1768
      "Overall doublet rate:\n",
1769
      "\tExpected   = 10.0%\n",
1770
      "\tEstimated  = 1.6%\n",
1771
      "Elapsed time: 19.7 seconds\n",
1772
      "Threshold found by scrublet\n",
1773
      "Detected doublet rate = 1.9%\n",
1774
      "Estimated detectable doublet fraction = 17.5%\n",
1775
      "Overall doublet rate:\n",
1776
      "\tExpected   = 10.0%\n",
1777
      "\tEstimated  = 10.9%\n",
1778
      "\n",
1779
      "\n",
1780
      "WSSS_SKN10827912\n",
1781
      "Preprocessing...\n",
1782
      "Simulating doublets...\n",
1783
      "Embedding transcriptomes using PCA...\n",
1784
      "Calculating doublet scores...\n",
1785
      "Automatically set threshold at doublet score = 0.61\n",
1786
      "Detected doublet rate = 0.7%\n",
1787
      "Estimated detectable doublet fraction = 10.1%\n",
1788
      "Overall doublet rate:\n",
1789
      "\tExpected   = 10.0%\n",
1790
      "\tEstimated  = 7.3%\n",
1791
      "Elapsed time: 17.8 seconds\n",
1792
      "Threshold found by scrublet\n",
1793
      "Detected doublet rate = 4.9%\n",
1794
      "Estimated detectable doublet fraction = 33.4%\n",
1795
      "Overall doublet rate:\n",
1796
      "\tExpected   = 10.0%\n",
1797
      "\tEstimated  = 14.7%\n",
1798
      "\n",
1799
      "\n",
1800
      "CTCL2_GEX_1\n",
1801
      "Preprocessing...\n",
1802
      "Simulating doublets...\n",
1803
      "Embedding transcriptomes using PCA...\n",
1804
      "Calculating doublet scores...\n",
1805
      "Automatically set threshold at doublet score = 0.60\n",
1806
      "Detected doublet rate = 0.5%\n",
1807
      "Estimated detectable doublet fraction = 18.3%\n",
1808
      "Overall doublet rate:\n",
1809
      "\tExpected   = 10.0%\n",
1810
      "\tEstimated  = 2.7%\n",
1811
      "Elapsed time: 5.4 seconds\n",
1812
      "Threshold found by scrublet\n",
1813
      "Detected doublet rate = 2.2%\n",
1814
      "Estimated detectable doublet fraction = 31.7%\n",
1815
      "Overall doublet rate:\n",
1816
      "\tExpected   = 10.0%\n",
1817
      "\tEstimated  = 7.0%\n",
1818
      "\n",
1819
      "\n",
1820
      "CTCL2_GEX_2\n",
1821
      "Preprocessing...\n",
1822
      "Simulating doublets...\n",
1823
      "Embedding transcriptomes using PCA...\n",
1824
      "Calculating doublet scores...\n",
1825
      "Automatically set threshold at doublet score = 0.62\n",
1826
      "Detected doublet rate = 0.2%\n",
1827
      "Estimated detectable doublet fraction = 4.5%\n",
1828
      "Overall doublet rate:\n",
1829
      "\tExpected   = 10.0%\n",
1830
      "\tEstimated  = 4.8%\n",
1831
      "Elapsed time: 6.1 seconds\n",
1832
      "Threshold found by scrublet\n",
1833
      "Detected doublet rate = 0.2%\n",
1834
      "Estimated detectable doublet fraction = 4.5%\n",
1835
      "Overall doublet rate:\n",
1836
      "\tExpected   = 10.0%\n",
1837
      "\tEstimated  = 4.8%\n",
1838
      "\n",
1839
      "\n",
1840
      "CTCL2_GEX_3\n",
1841
      "Preprocessing...\n",
1842
      "Simulating doublets...\n",
1843
      "Embedding transcriptomes using PCA...\n",
1844
      "Calculating doublet scores...\n",
1845
      "Automatically set threshold at doublet score = 0.47\n",
1846
      "Detected doublet rate = 0.5%\n",
1847
      "Estimated detectable doublet fraction = 16.1%\n",
1848
      "Overall doublet rate:\n",
1849
      "\tExpected   = 10.0%\n",
1850
      "\tEstimated  = 3.1%\n",
1851
      "Elapsed time: 6.8 seconds\n",
1852
      "Threshold found by scrublet\n",
1853
      "Detected doublet rate = 5.8%\n",
1854
      "Estimated detectable doublet fraction = 46.1%\n",
1855
      "Overall doublet rate:\n",
1856
      "\tExpected   = 10.0%\n",
1857
      "\tEstimated  = 12.7%\n",
1858
      "\n",
1859
      "\n",
1860
      "CTCL2_GEX_4\n",
1861
      "Preprocessing...\n",
1862
      "Simulating doublets...\n",
1863
      "Embedding transcriptomes using PCA...\n",
1864
      "Calculating doublet scores...\n",
1865
      "Automatically set threshold at doublet score = 0.40\n",
1866
      "Detected doublet rate = 1.0%\n",
1867
      "Estimated detectable doublet fraction = 26.8%\n",
1868
      "Overall doublet rate:\n",
1869
      "\tExpected   = 10.0%\n",
1870
      "\tEstimated  = 3.5%\n",
1871
      "Elapsed time: 21.4 seconds\n",
1872
      "Threshold found by scrublet\n",
1873
      "Detected doublet rate = 2.5%\n",
1874
      "Estimated detectable doublet fraction = 39.1%\n",
1875
      "Overall doublet rate:\n",
1876
      "\tExpected   = 10.0%\n",
1877
      "\tEstimated  = 6.3%\n",
1878
      "\n",
1879
      "\n",
1880
      "CTCL2_GEX_5\n",
1881
      "Preprocessing...\n",
1882
      "Simulating doublets...\n",
1883
      "Embedding transcriptomes using PCA...\n",
1884
      "Calculating doublet scores...\n",
1885
      "Automatically set threshold at doublet score = 0.41\n",
1886
      "Detected doublet rate = 1.0%\n",
1887
      "Estimated detectable doublet fraction = 19.4%\n",
1888
      "Overall doublet rate:\n",
1889
      "\tExpected   = 10.0%\n",
1890
      "\tEstimated  = 5.3%\n",
1891
      "Elapsed time: 27.2 seconds\n",
1892
      "Threshold found by scrublet\n",
1893
      "Detected doublet rate = 2.4%\n",
1894
      "Estimated detectable doublet fraction = 29.7%\n",
1895
      "Overall doublet rate:\n",
1896
      "\tExpected   = 10.0%\n",
1897
      "\tEstimated  = 8.0%\n",
1898
      "\n",
1899
      "\n",
1900
      "CTCL3_GEX_1\n",
1901
      "Preprocessing...\n",
1902
      "Simulating doublets...\n",
1903
      "Embedding transcriptomes using PCA...\n",
1904
      "Calculating doublet scores...\n",
1905
      "Automatically set threshold at doublet score = 0.42\n",
1906
      "Detected doublet rate = 1.1%\n",
1907
      "Estimated detectable doublet fraction = 15.5%\n",
1908
      "Overall doublet rate:\n",
1909
      "\tExpected   = 10.0%\n",
1910
      "\tEstimated  = 6.9%\n",
1911
      "Elapsed time: 28.1 seconds\n",
1912
      "Threshold found by scrublet\n",
1913
      "Detected doublet rate = 4.4%\n",
1914
      "Estimated detectable doublet fraction = 36.5%\n",
1915
      "Overall doublet rate:\n",
1916
      "\tExpected   = 10.0%\n",
1917
      "\tEstimated  = 12.0%\n",
1918
      "\n",
1919
      "\n",
1920
      "CTCL3_GEX_2\n",
1921
      "Preprocessing...\n",
1922
      "Simulating doublets...\n",
1923
      "Embedding transcriptomes using PCA...\n",
1924
      "Calculating doublet scores...\n",
1925
      "Automatically set threshold at doublet score = 0.44\n",
1926
      "Detected doublet rate = 1.7%\n",
1927
      "Estimated detectable doublet fraction = 24.3%\n",
1928
      "Overall doublet rate:\n",
1929
      "\tExpected   = 10.0%\n",
1930
      "\tEstimated  = 7.2%\n",
1931
      "Elapsed time: 15.4 seconds\n",
1932
      "Threshold found by scrublet\n",
1933
      "Detected doublet rate = 4.4%\n",
1934
      "Estimated detectable doublet fraction = 39.9%\n",
1935
      "Overall doublet rate:\n",
1936
      "\tExpected   = 10.0%\n",
1937
      "\tEstimated  = 11.0%\n",
1938
      "\n",
1939
      "\n",
1940
      "CTCL3_GEX_3\n",
1941
      "Preprocessing...\n",
1942
      "Simulating doublets...\n",
1943
      "Embedding transcriptomes using PCA...\n",
1944
      "Calculating doublet scores...\n",
1945
      "Automatically set threshold at doublet score = 0.44\n",
1946
      "Detected doublet rate = 1.6%\n",
1947
      "Estimated detectable doublet fraction = 23.4%\n",
1948
      "Overall doublet rate:\n",
1949
      "\tExpected   = 10.0%\n",
1950
      "\tEstimated  = 6.9%\n",
1951
      "Elapsed time: 12.7 seconds\n",
1952
      "Threshold found by scrublet\n",
1953
      "Detected doublet rate = 3.6%\n",
1954
      "Estimated detectable doublet fraction = 35.5%\n",
1955
      "Overall doublet rate:\n",
1956
      "\tExpected   = 10.0%\n",
1957
      "\tEstimated  = 10.3%\n",
1958
      "\n",
1959
      "\n",
1960
      "CTCL3_GEX_4\n",
1961
      "Preprocessing...\n",
1962
      "Simulating doublets...\n",
1963
      "Embedding transcriptomes using PCA...\n",
1964
      "Calculating doublet scores...\n",
1965
      "Automatically set threshold at doublet score = 0.42\n",
1966
      "Detected doublet rate = 2.1%\n",
1967
      "Estimated detectable doublet fraction = 35.9%\n",
1968
      "Overall doublet rate:\n",
1969
      "\tExpected   = 10.0%\n",
1970
      "\tEstimated  = 5.7%\n",
1971
      "Elapsed time: 9.5 seconds\n",
1972
      "Threshold found by scrublet\n",
1973
      "Detected doublet rate = 2.5%\n",
1974
      "Estimated detectable doublet fraction = 39.6%\n",
1975
      "Overall doublet rate:\n",
1976
      "\tExpected   = 10.0%\n",
1977
      "\tEstimated  = 6.2%\n",
1978
      "\n",
1979
      "\n",
1980
      "CTCL4_GEX_1\n",
1981
      "Preprocessing...\n",
1982
      "Simulating doublets...\n",
1983
      "Embedding transcriptomes using PCA...\n",
1984
      "Calculating doublet scores...\n",
1985
      "Automatically set threshold at doublet score = 0.78\n",
1986
      "Detected doublet rate = 0.0%\n",
1987
      "Estimated detectable doublet fraction = 0.2%\n",
1988
      "Overall doublet rate:\n",
1989
      "\tExpected   = 10.0%\n",
1990
      "\tEstimated  = 0.0%\n",
1991
      "Elapsed time: 19.6 seconds\n",
1992
      "Threshold found by scrublet\n",
1993
      "Detected doublet rate = 1.2%\n",
1994
      "Estimated detectable doublet fraction = 12.8%\n",
1995
      "Overall doublet rate:\n",
1996
      "\tExpected   = 10.0%\n",
1997
      "\tEstimated  = 9.0%\n",
1998
      "\n",
1999
      "\n",
2000
      "CTCL4_GEX_2\n",
2001
      "Preprocessing...\n",
2002
      "Simulating doublets...\n",
2003
      "Embedding transcriptomes using PCA...\n",
2004
      "Calculating doublet scores...\n",
2005
      "Automatically set threshold at doublet score = 0.49\n",
2006
      "Detected doublet rate = 0.6%\n",
2007
      "Estimated detectable doublet fraction = 7.9%\n",
2008
      "Overall doublet rate:\n",
2009
      "\tExpected   = 10.0%\n",
2010
      "\tEstimated  = 7.6%\n",
2011
      "Elapsed time: 18.7 seconds\n",
2012
      "Threshold found by scrublet\n",
2013
      "Detected doublet rate = 1.2%\n",
2014
      "Estimated detectable doublet fraction = 13.1%\n",
2015
      "Overall doublet rate:\n",
2016
      "\tExpected   = 10.0%\n",
2017
      "\tEstimated  = 9.5%\n",
2018
      "\n",
2019
      "\n",
2020
      "CTCL4_GEX_3\n",
2021
      "Preprocessing...\n",
2022
      "Simulating doublets...\n",
2023
      "Embedding transcriptomes using PCA...\n",
2024
      "Calculating doublet scores...\n",
2025
      "Automatically set threshold at doublet score = 0.77\n",
2026
      "Detected doublet rate = 0.0%\n",
2027
      "Estimated detectable doublet fraction = 0.1%\n",
2028
      "Overall doublet rate:\n",
2029
      "\tExpected   = 10.0%\n",
2030
      "\tEstimated  = 0.0%\n",
2031
      "Elapsed time: 16.2 seconds\n",
2032
      "Threshold found by scrublet\n",
2033
      "Detected doublet rate = 0.8%\n",
2034
      "Estimated detectable doublet fraction = 6.8%\n",
2035
      "Overall doublet rate:\n",
2036
      "\tExpected   = 10.0%\n",
2037
      "\tEstimated  = 11.7%\n",
2038
      "\n",
2039
      "\n",
2040
      "CTCL4_GEX_4\n",
2041
      "Preprocessing...\n",
2042
      "Simulating doublets...\n",
2043
      "Embedding transcriptomes using PCA...\n",
2044
      "Calculating doublet scores...\n",
2045
      "Automatically set threshold at doublet score = 0.34\n",
2046
      "Detected doublet rate = 2.1%\n",
2047
      "Estimated detectable doublet fraction = 23.1%\n",
2048
      "Overall doublet rate:\n",
2049
      "\tExpected   = 10.0%\n",
2050
      "\tEstimated  = 9.1%\n",
2051
      "Elapsed time: 14.9 seconds\n",
2052
      "Threshold found by scrublet\n",
2053
      "Detected doublet rate = 3.4%\n",
2054
      "Estimated detectable doublet fraction = 28.2%\n",
2055
      "Overall doublet rate:\n",
2056
      "\tExpected   = 10.0%\n",
2057
      "\tEstimated  = 12.0%\n",
2058
      "\n",
2059
      "\n"
2060
     ]
2061
    }
2062
   ],
2063
   "source": [
2064
    "RUNs, DSs, CELLs, THRs, MEDs, MADs, CUTs, no_thr = [], [], [], [], [], [], [], []\n",
2065
    "\n",
2066
    "for run in adata.obs['Sanger_ID'].unique():\n",
2067
    "    print(run)\n",
2068
    "    ad = adata[adata.obs['Sanger_ID'] == run, :]\n",
2069
    "    x = ad.X\n",
2070
    "    scrub = scr.Scrublet(x)\n",
2071
    "    ds, prd = scrub.scrub_doublets()\n",
2072
    "    RUNs.append(run)\n",
2073
    "    DSs.append(ds)\n",
2074
    "    CELLs.append(ad.obs_names)\n",
2075
    "    # MAD calculation of threshold:\n",
2076
    "    MED = np.median(ds)\n",
2077
    "    MAD = robust.mad(ds)\n",
2078
    "    CUT = (MED + (4 * MAD))\n",
2079
    "    MEDs.append(MED)\n",
2080
    "    MADs.append(MAD)\n",
2081
    "    CUTs.append(CUT)\n",
2082
    "\n",
2083
    "    try:  # not always can calculate automatic threshold\n",
2084
    "        THRs.append(scrub.threshold_)\n",
2085
    "        print('Threshold found by scrublet')\n",
2086
    "    except:\n",
2087
    "        THRs.append(0.4)\n",
2088
    "        no_thr.append(run)\n",
2089
    "        print('No threshold found, assigning 0.4 to', run)\n",
2090
    "        scrub.call_doublets(threshold=0.4) # so that it can make the plot\n",
2091
    "    fig = scrub.plot_histogram()\n",
2092
    "    fig[0].savefig(run + '.png')\n",
2093
    "    \n",
2094
    "    \n",
2095
    "    scrub.call_doublets(threshold=CUT)\n",
2096
    "    fig = scrub.plot_histogram()\n",
2097
    "    fig[0].savefig(run + '_mad_' + '.png')\n",
2098
    "    plt.close('all')\n",
2099
    "    print()\n",
2100
    "    print()"
2101
   ]
2102
  },
2103
  {
2104
   "cell_type": "code",
2105
   "execution_count": 21,
2106
   "id": "5e3843e1-d357-4dc5-b57e-5ca0bb85b52f",
2107
   "metadata": {
2108
    "tags": []
2109
   },
2110
   "outputs": [],
2111
   "source": [
2112
    "ns = np.array(list(map(len, DSs)))"
2113
   ]
2114
  },
2115
  {
2116
   "cell_type": "code",
2117
   "execution_count": 22,
2118
   "id": "b7209b5f-31ba-4e37-a8cf-be9deadc2a2c",
2119
   "metadata": {
2120
    "tags": []
2121
   },
2122
   "outputs": [],
2123
   "source": [
2124
    "tbl = pd.DataFrame({\n",
2125
    "    'run': np.repeat(RUNs, ns),\n",
2126
    "    'ds': np.concatenate(DSs),\n",
2127
    "    'thr': np.repeat(THRs, ns),\n",
2128
    "    'mad_MED': np.repeat(MEDs, ns),\n",
2129
    "    'mad_MAD': np.repeat(MADs, ns),\n",
2130
    "    'mad_thr': np.repeat(CUTs, ns),\n",
2131
    "    }, index=np.concatenate(CELLs))\n",
2132
    "\n",
2133
    "tbl['auto_prd'] = tbl['ds'] > tbl['thr']\n",
2134
    "tbl['mad_prd'] = tbl['ds'] > tbl['mad_thr']"
2135
   ]
2136
  },
2137
  {
2138
   "cell_type": "code",
2139
   "execution_count": 23,
2140
   "id": "97e37766-7277-459c-818c-70b117f07614",
2141
   "metadata": {
2142
    "tags": []
2143
   },
2144
   "outputs": [],
2145
   "source": [
2146
    "adata.obs['mad_prd']=tbl['mad_prd']\n",
2147
    "adata.obs['ds']=tbl['ds']\n",
2148
    "adata.obs['mad_MED']=tbl['mad_MED']\n",
2149
    "adata.obs['mad_MAD']=tbl['mad_MAD']\n",
2150
    "adata.obs['mad_thr']=tbl['mad_thr']"
2151
   ]
2152
  },
2153
  {
2154
   "cell_type": "code",
2155
   "execution_count": 24,
2156
   "id": "75c30c38-a91e-4e07-b27d-3777248ade23",
2157
   "metadata": {
2158
    "collapsed": true,
2159
    "jupyter": {
2160
     "outputs_hidden": true
2161
    },
2162
    "tags": []
2163
   },
2164
   "outputs": [
2165
    {
2166
     "data": {
2167
      "text/html": [
2168
       "<div>\n",
2169
       "<style scoped>\n",
2170
       "    .dataframe tbody tr th:only-of-type {\n",
2171
       "        vertical-align: middle;\n",
2172
       "    }\n",
2173
       "\n",
2174
       "    .dataframe tbody tr th {\n",
2175
       "        vertical-align: top;\n",
2176
       "    }\n",
2177
       "\n",
2178
       "    .dataframe thead th {\n",
2179
       "        text-align: right;\n",
2180
       "    }\n",
2181
       "</style>\n",
2182
       "<table border=\"1\" class=\"dataframe\">\n",
2183
       "  <thead>\n",
2184
       "    <tr style=\"text-align: right;\">\n",
2185
       "      <th></th>\n",
2186
       "      <th>sample_type</th>\n",
2187
       "      <th>Donor</th>\n",
2188
       "      <th>Sanger_ID</th>\n",
2189
       "      <th>tissue</th>\n",
2190
       "      <th>site</th>\n",
2191
       "      <th>Sex</th>\n",
2192
       "      <th>batch</th>\n",
2193
       "      <th>n_counts</th>\n",
2194
       "      <th>mad_prd</th>\n",
2195
       "      <th>ds</th>\n",
2196
       "      <th>mad_MED</th>\n",
2197
       "      <th>mad_MAD</th>\n",
2198
       "      <th>mad_thr</th>\n",
2199
       "    </tr>\n",
2200
       "  </thead>\n",
2201
       "  <tbody>\n",
2202
       "    <tr>\n",
2203
       "      <th>AAACCTGAGAAGCCCA-0</th>\n",
2204
       "      <td>CTCL</td>\n",
2205
       "      <td>CTCL1</td>\n",
2206
       "      <td>WSSS_SKN8090612</td>\n",
2207
       "      <td>Epidermis</td>\n",
2208
       "      <td>lesion</td>\n",
2209
       "      <td>Female</td>\n",
2210
       "      <td>0</td>\n",
2211
       "      <td>9657.0</td>\n",
2212
       "      <td>False</td>\n",
2213
       "      <td>0.054859</td>\n",
2214
       "      <td>0.074380</td>\n",
2215
       "      <td>0.045523</td>\n",
2216
       "      <td>0.256471</td>\n",
2217
       "    </tr>\n",
2218
       "    <tr>\n",
2219
       "      <th>AAACCTGAGAATGTTG-0</th>\n",
2220
       "      <td>CTCL</td>\n",
2221
       "      <td>CTCL1</td>\n",
2222
       "      <td>WSSS_SKN8090612</td>\n",
2223
       "      <td>Epidermis</td>\n",
2224
       "      <td>lesion</td>\n",
2225
       "      <td>Female</td>\n",
2226
       "      <td>0</td>\n",
2227
       "      <td>7317.0</td>\n",
2228
       "      <td>False</td>\n",
2229
       "      <td>0.105085</td>\n",
2230
       "      <td>0.074380</td>\n",
2231
       "      <td>0.045523</td>\n",
2232
       "      <td>0.256471</td>\n",
2233
       "    </tr>\n",
2234
       "    <tr>\n",
2235
       "      <th>AAACCTGAGCCAACAG-0</th>\n",
2236
       "      <td>CTCL</td>\n",
2237
       "      <td>CTCL1</td>\n",
2238
       "      <td>WSSS_SKN8090612</td>\n",
2239
       "      <td>Epidermis</td>\n",
2240
       "      <td>lesion</td>\n",
2241
       "      <td>Female</td>\n",
2242
       "      <td>0</td>\n",
2243
       "      <td>2564.0</td>\n",
2244
       "      <td>False</td>\n",
2245
       "      <td>0.207339</td>\n",
2246
       "      <td>0.074380</td>\n",
2247
       "      <td>0.045523</td>\n",
2248
       "      <td>0.256471</td>\n",
2249
       "    </tr>\n",
2250
       "    <tr>\n",
2251
       "      <th>AAACCTGAGCGTTCCG-0</th>\n",
2252
       "      <td>CTCL</td>\n",
2253
       "      <td>CTCL1</td>\n",
2254
       "      <td>WSSS_SKN8090612</td>\n",
2255
       "      <td>Epidermis</td>\n",
2256
       "      <td>lesion</td>\n",
2257
       "      <td>Female</td>\n",
2258
       "      <td>0</td>\n",
2259
       "      <td>1501.0</td>\n",
2260
       "      <td>False</td>\n",
2261
       "      <td>0.118727</td>\n",
2262
       "      <td>0.074380</td>\n",
2263
       "      <td>0.045523</td>\n",
2264
       "      <td>0.256471</td>\n",
2265
       "    </tr>\n",
2266
       "    <tr>\n",
2267
       "      <th>AAACCTGAGTACGTTC-0</th>\n",
2268
       "      <td>CTCL</td>\n",
2269
       "      <td>CTCL1</td>\n",
2270
       "      <td>WSSS_SKN8090612</td>\n",
2271
       "      <td>Epidermis</td>\n",
2272
       "      <td>lesion</td>\n",
2273
       "      <td>Female</td>\n",
2274
       "      <td>0</td>\n",
2275
       "      <td>31221.0</td>\n",
2276
       "      <td>False</td>\n",
2277
       "      <td>0.085657</td>\n",
2278
       "      <td>0.074380</td>\n",
2279
       "      <td>0.045523</td>\n",
2280
       "      <td>0.256471</td>\n",
2281
       "    </tr>\n",
2282
       "    <tr>\n",
2283
       "      <th>...</th>\n",
2284
       "      <td>...</td>\n",
2285
       "      <td>...</td>\n",
2286
       "      <td>...</td>\n",
2287
       "      <td>...</td>\n",
2288
       "      <td>...</td>\n",
2289
       "      <td>...</td>\n",
2290
       "      <td>...</td>\n",
2291
       "      <td>...</td>\n",
2292
       "      <td>...</td>\n",
2293
       "      <td>...</td>\n",
2294
       "      <td>...</td>\n",
2295
       "      <td>...</td>\n",
2296
       "      <td>...</td>\n",
2297
       "    </tr>\n",
2298
       "    <tr>\n",
2299
       "      <th>TTTGTCACATGATCCA-39</th>\n",
2300
       "      <td>CTCL</td>\n",
2301
       "      <td>CTCL4</td>\n",
2302
       "      <td>CTCL4_GEX_4</td>\n",
2303
       "      <td>Epidermis</td>\n",
2304
       "      <td>lesion</td>\n",
2305
       "      <td>Male</td>\n",
2306
       "      <td>39</td>\n",
2307
       "      <td>14797.0</td>\n",
2308
       "      <td>False</td>\n",
2309
       "      <td>0.120760</td>\n",
2310
       "      <td>0.079332</td>\n",
2311
       "      <td>0.040760</td>\n",
2312
       "      <td>0.242374</td>\n",
2313
       "    </tr>\n",
2314
       "    <tr>\n",
2315
       "      <th>TTTGTCACATTACCTT-39</th>\n",
2316
       "      <td>CTCL</td>\n",
2317
       "      <td>CTCL4</td>\n",
2318
       "      <td>CTCL4_GEX_4</td>\n",
2319
       "      <td>Epidermis</td>\n",
2320
       "      <td>lesion</td>\n",
2321
       "      <td>Male</td>\n",
2322
       "      <td>39</td>\n",
2323
       "      <td>1168.0</td>\n",
2324
       "      <td>False</td>\n",
2325
       "      <td>0.058511</td>\n",
2326
       "      <td>0.079332</td>\n",
2327
       "      <td>0.040760</td>\n",
2328
       "      <td>0.242374</td>\n",
2329
       "    </tr>\n",
2330
       "    <tr>\n",
2331
       "      <th>TTTGTCAGTCCAGTAT-39</th>\n",
2332
       "      <td>CTCL</td>\n",
2333
       "      <td>CTCL4</td>\n",
2334
       "      <td>CTCL4_GEX_4</td>\n",
2335
       "      <td>Epidermis</td>\n",
2336
       "      <td>lesion</td>\n",
2337
       "      <td>Male</td>\n",
2338
       "      <td>39</td>\n",
2339
       "      <td>5442.0</td>\n",
2340
       "      <td>False</td>\n",
2341
       "      <td>0.102190</td>\n",
2342
       "      <td>0.079332</td>\n",
2343
       "      <td>0.040760</td>\n",
2344
       "      <td>0.242374</td>\n",
2345
       "    </tr>\n",
2346
       "    <tr>\n",
2347
       "      <th>TTTGTCATCACTATTC-39</th>\n",
2348
       "      <td>CTCL</td>\n",
2349
       "      <td>CTCL4</td>\n",
2350
       "      <td>CTCL4_GEX_4</td>\n",
2351
       "      <td>Epidermis</td>\n",
2352
       "      <td>lesion</td>\n",
2353
       "      <td>Male</td>\n",
2354
       "      <td>39</td>\n",
2355
       "      <td>2733.0</td>\n",
2356
       "      <td>False</td>\n",
2357
       "      <td>0.070175</td>\n",
2358
       "      <td>0.079332</td>\n",
2359
       "      <td>0.040760</td>\n",
2360
       "      <td>0.242374</td>\n",
2361
       "    </tr>\n",
2362
       "    <tr>\n",
2363
       "      <th>TTTGTCATCGGTGTTA-39</th>\n",
2364
       "      <td>CTCL</td>\n",
2365
       "      <td>CTCL4</td>\n",
2366
       "      <td>CTCL4_GEX_4</td>\n",
2367
       "      <td>Epidermis</td>\n",
2368
       "      <td>lesion</td>\n",
2369
       "      <td>Male</td>\n",
2370
       "      <td>39</td>\n",
2371
       "      <td>10016.0</td>\n",
2372
       "      <td>False</td>\n",
2373
       "      <td>0.053435</td>\n",
2374
       "      <td>0.079332</td>\n",
2375
       "      <td>0.040760</td>\n",
2376
       "      <td>0.242374</td>\n",
2377
       "    </tr>\n",
2378
       "  </tbody>\n",
2379
       "</table>\n",
2380
       "<p>346056 rows × 13 columns</p>\n",
2381
       "</div>"
2382
      ],
2383
      "text/plain": [
2384
       "                    sample_type  Donor        Sanger_ID     tissue    site  \\\n",
2385
       "AAACCTGAGAAGCCCA-0         CTCL  CTCL1  WSSS_SKN8090612  Epidermis  lesion   \n",
2386
       "AAACCTGAGAATGTTG-0         CTCL  CTCL1  WSSS_SKN8090612  Epidermis  lesion   \n",
2387
       "AAACCTGAGCCAACAG-0         CTCL  CTCL1  WSSS_SKN8090612  Epidermis  lesion   \n",
2388
       "AAACCTGAGCGTTCCG-0         CTCL  CTCL1  WSSS_SKN8090612  Epidermis  lesion   \n",
2389
       "AAACCTGAGTACGTTC-0         CTCL  CTCL1  WSSS_SKN8090612  Epidermis  lesion   \n",
2390
       "...                         ...    ...              ...        ...     ...   \n",
2391
       "TTTGTCACATGATCCA-39        CTCL  CTCL4      CTCL4_GEX_4  Epidermis  lesion   \n",
2392
       "TTTGTCACATTACCTT-39        CTCL  CTCL4      CTCL4_GEX_4  Epidermis  lesion   \n",
2393
       "TTTGTCAGTCCAGTAT-39        CTCL  CTCL4      CTCL4_GEX_4  Epidermis  lesion   \n",
2394
       "TTTGTCATCACTATTC-39        CTCL  CTCL4      CTCL4_GEX_4  Epidermis  lesion   \n",
2395
       "TTTGTCATCGGTGTTA-39        CTCL  CTCL4      CTCL4_GEX_4  Epidermis  lesion   \n",
2396
       "\n",
2397
       "                        Sex batch  n_counts  mad_prd        ds   mad_MED  \\\n",
2398
       "AAACCTGAGAAGCCCA-0   Female     0    9657.0    False  0.054859  0.074380   \n",
2399
       "AAACCTGAGAATGTTG-0   Female     0    7317.0    False  0.105085  0.074380   \n",
2400
       "AAACCTGAGCCAACAG-0   Female     0    2564.0    False  0.207339  0.074380   \n",
2401
       "AAACCTGAGCGTTCCG-0   Female     0    1501.0    False  0.118727  0.074380   \n",
2402
       "AAACCTGAGTACGTTC-0   Female     0   31221.0    False  0.085657  0.074380   \n",
2403
       "...                     ...   ...       ...      ...       ...       ...   \n",
2404
       "TTTGTCACATGATCCA-39    Male    39   14797.0    False  0.120760  0.079332   \n",
2405
       "TTTGTCACATTACCTT-39    Male    39    1168.0    False  0.058511  0.079332   \n",
2406
       "TTTGTCAGTCCAGTAT-39    Male    39    5442.0    False  0.102190  0.079332   \n",
2407
       "TTTGTCATCACTATTC-39    Male    39    2733.0    False  0.070175  0.079332   \n",
2408
       "TTTGTCATCGGTGTTA-39    Male    39   10016.0    False  0.053435  0.079332   \n",
2409
       "\n",
2410
       "                      mad_MAD   mad_thr  \n",
2411
       "AAACCTGAGAAGCCCA-0   0.045523  0.256471  \n",
2412
       "AAACCTGAGAATGTTG-0   0.045523  0.256471  \n",
2413
       "AAACCTGAGCCAACAG-0   0.045523  0.256471  \n",
2414
       "AAACCTGAGCGTTCCG-0   0.045523  0.256471  \n",
2415
       "AAACCTGAGTACGTTC-0   0.045523  0.256471  \n",
2416
       "...                       ...       ...  \n",
2417
       "TTTGTCACATGATCCA-39  0.040760  0.242374  \n",
2418
       "TTTGTCACATTACCTT-39  0.040760  0.242374  \n",
2419
       "TTTGTCAGTCCAGTAT-39  0.040760  0.242374  \n",
2420
       "TTTGTCATCACTATTC-39  0.040760  0.242374  \n",
2421
       "TTTGTCATCGGTGTTA-39  0.040760  0.242374  \n",
2422
       "\n",
2423
       "[346056 rows x 13 columns]"
2424
      ]
2425
     },
2426
     "execution_count": 24,
2427
     "metadata": {},
2428
     "output_type": "execute_result"
2429
    }
2430
   ],
2431
   "source": [
2432
    "adata.obs"
2433
   ]
2434
  },
2435
  {
2436
   "cell_type": "code",
2437
   "execution_count": 25,
2438
   "id": "24e4e5d7-2b52-47d5-ac42-ffec5f300b7d",
2439
   "metadata": {
2440
    "tags": []
2441
   },
2442
   "outputs": [],
2443
   "source": [
2444
    "adata.write_h5ad('/lustre/scratch126/cellgen/team298/ab72/CTCL/ctcl_cellbender_raw_db.h5ad')"
2445
   ]
2446
  },
2447
  {
2448
   "cell_type": "code",
2449
   "execution_count": 26,
2450
   "id": "42af0828-be3c-4ac2-8766-47a493826c17",
2451
   "metadata": {
2452
    "tags": []
2453
   },
2454
   "outputs": [],
2455
   "source": [
2456
    "adata = adata[tbl['mad_prd'] != True]"
2457
   ]
2458
  },
2459
  {
2460
   "cell_type": "code",
2461
   "execution_count": 126,
2462
   "id": "4d455bb1-0d71-4f66-9fa0-eb9d183ecfaa",
2463
   "metadata": {
2464
    "tags": []
2465
   },
2466
   "outputs": [],
2467
   "source": [
2468
    "#adata.obs['mad_prd']=tbl['mad_prd']\n",
2469
    "#adata = adata[tbl['mad_prd'] != True]"
2470
   ]
2471
  },
2472
  {
2473
   "cell_type": "code",
2474
   "execution_count": 27,
2475
   "id": "3ca3b4d2-94be-4322-b071-f44cf9ec125a",
2476
   "metadata": {
2477
    "tags": []
2478
   },
2479
   "outputs": [
2480
    {
2481
     "name": "stdout",
2482
     "output_type": "stream",
2483
     "text": [
2484
      "filtered out 20793 cells that have less than 200 genes expressed\n"
2485
     ]
2486
    },
2487
    {
2488
     "name": "stderr",
2489
     "output_type": "stream",
2490
     "text": [
2491
      "/nfs/team298/ab72/miniconda3/envs/multiome/lib/python3.10/site-packages/scanpy/preprocessing/_simple.py:160: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual.\n",
2492
      "  adata.obs[\"n_genes\"] = number\n"
2493
     ]
2494
    },
2495
    {
2496
     "name": "stdout",
2497
     "output_type": "stream",
2498
     "text": [
2499
      "filtered out 6767 genes that are detected in less than 3 cells\n"
2500
     ]
2501
    }
2502
   ],
2503
   "source": [
2504
    "sc.pp.filter_cells(adata, min_genes=200)\n",
2505
    "sc.pp.filter_genes(adata, min_cells=3)\n",
2506
    "mito_genes = adata.var_names.str.startswith('MT-')\n",
2507
    "adata.obs['percent_mito'] = np.sum(adata[:, mito_genes].X, axis=1).A1 / np.sum(adata.X, axis=1).A1\n",
2508
    "adata.obs['n_counts'] = adata.X.sum(axis=1).A1"
2509
   ]
2510
  },
2511
  {
2512
   "cell_type": "code",
2513
   "execution_count": 28,
2514
   "id": "f97a2981-b5ee-4820-a3ba-2d0727b6dece",
2515
   "metadata": {
2516
    "tags": []
2517
   },
2518
   "outputs": [],
2519
   "source": [
2520
    "adata.obs_names_make_unique()"
2521
   ]
2522
  },
2523
  {
2524
   "cell_type": "code",
2525
   "execution_count": 31,
2526
   "id": "58b20fa2-e576-42e6-9e1d-491e0ad9aa5f",
2527
   "metadata": {
2528
    "tags": []
2529
   },
2530
   "outputs": [
2531
    {
2532
     "data": {
2533
      "text/html": [
2534
       "<div>\n",
2535
       "<style scoped>\n",
2536
       "    .dataframe tbody tr th:only-of-type {\n",
2537
       "        vertical-align: middle;\n",
2538
       "    }\n",
2539
       "\n",
2540
       "    .dataframe tbody tr th {\n",
2541
       "        vertical-align: top;\n",
2542
       "    }\n",
2543
       "\n",
2544
       "    .dataframe thead th {\n",
2545
       "        text-align: right;\n",
2546
       "    }\n",
2547
       "</style>\n",
2548
       "<table border=\"1\" class=\"dataframe\">\n",
2549
       "  <thead>\n",
2550
       "    <tr style=\"text-align: right;\">\n",
2551
       "      <th></th>\n",
2552
       "      <th>sample_type</th>\n",
2553
       "      <th>Donor</th>\n",
2554
       "      <th>Sanger_ID</th>\n",
2555
       "      <th>tissue</th>\n",
2556
       "      <th>site</th>\n",
2557
       "      <th>Sex</th>\n",
2558
       "      <th>batch</th>\n",
2559
       "      <th>n_counts</th>\n",
2560
       "      <th>mad_prd</th>\n",
2561
       "      <th>ds</th>\n",
2562
       "      <th>mad_MED</th>\n",
2563
       "      <th>mad_MAD</th>\n",
2564
       "      <th>mad_thr</th>\n",
2565
       "      <th>n_genes</th>\n",
2566
       "      <th>percent_mito</th>\n",
2567
       "    </tr>\n",
2568
       "  </thead>\n",
2569
       "  <tbody>\n",
2570
       "    <tr>\n",
2571
       "      <th>AAACCTGAGAAGCCCA-0</th>\n",
2572
       "      <td>CTCL</td>\n",
2573
       "      <td>CTCL1</td>\n",
2574
       "      <td>WSSS_SKN8090612</td>\n",
2575
       "      <td>Epidermis</td>\n",
2576
       "      <td>lesion</td>\n",
2577
       "      <td>Female</td>\n",
2578
       "      <td>0</td>\n",
2579
       "      <td>9657.0</td>\n",
2580
       "      <td>False</td>\n",
2581
       "      <td>0.054859</td>\n",
2582
       "      <td>0.074380</td>\n",
2583
       "      <td>0.045523</td>\n",
2584
       "      <td>0.256471</td>\n",
2585
       "      <td>2766</td>\n",
2586
       "      <td>0.002692</td>\n",
2587
       "    </tr>\n",
2588
       "    <tr>\n",
2589
       "      <th>AAACCTGAGAATGTTG-0</th>\n",
2590
       "      <td>CTCL</td>\n",
2591
       "      <td>CTCL1</td>\n",
2592
       "      <td>WSSS_SKN8090612</td>\n",
2593
       "      <td>Epidermis</td>\n",
2594
       "      <td>lesion</td>\n",
2595
       "      <td>Female</td>\n",
2596
       "      <td>0</td>\n",
2597
       "      <td>7317.0</td>\n",
2598
       "      <td>False</td>\n",
2599
       "      <td>0.105085</td>\n",
2600
       "      <td>0.074380</td>\n",
2601
       "      <td>0.045523</td>\n",
2602
       "      <td>0.256471</td>\n",
2603
       "      <td>3106</td>\n",
2604
       "      <td>0.000547</td>\n",
2605
       "    </tr>\n",
2606
       "    <tr>\n",
2607
       "      <th>AAACCTGAGCCAACAG-0</th>\n",
2608
       "      <td>CTCL</td>\n",
2609
       "      <td>CTCL1</td>\n",
2610
       "      <td>WSSS_SKN8090612</td>\n",
2611
       "      <td>Epidermis</td>\n",
2612
       "      <td>lesion</td>\n",
2613
       "      <td>Female</td>\n",
2614
       "      <td>0</td>\n",
2615
       "      <td>2564.0</td>\n",
2616
       "      <td>False</td>\n",
2617
       "      <td>0.207339</td>\n",
2618
       "      <td>0.074380</td>\n",
2619
       "      <td>0.045523</td>\n",
2620
       "      <td>0.256471</td>\n",
2621
       "      <td>633</td>\n",
2622
       "      <td>0.003510</td>\n",
2623
       "    </tr>\n",
2624
       "    <tr>\n",
2625
       "      <th>AAACCTGAGCGTTCCG-0</th>\n",
2626
       "      <td>CTCL</td>\n",
2627
       "      <td>CTCL1</td>\n",
2628
       "      <td>WSSS_SKN8090612</td>\n",
2629
       "      <td>Epidermis</td>\n",
2630
       "      <td>lesion</td>\n",
2631
       "      <td>Female</td>\n",
2632
       "      <td>0</td>\n",
2633
       "      <td>1501.0</td>\n",
2634
       "      <td>False</td>\n",
2635
       "      <td>0.118727</td>\n",
2636
       "      <td>0.074380</td>\n",
2637
       "      <td>0.045523</td>\n",
2638
       "      <td>0.256471</td>\n",
2639
       "      <td>979</td>\n",
2640
       "      <td>0.039307</td>\n",
2641
       "    </tr>\n",
2642
       "    <tr>\n",
2643
       "      <th>AAACCTGAGTACGTTC-0</th>\n",
2644
       "      <td>CTCL</td>\n",
2645
       "      <td>CTCL1</td>\n",
2646
       "      <td>WSSS_SKN8090612</td>\n",
2647
       "      <td>Epidermis</td>\n",
2648
       "      <td>lesion</td>\n",
2649
       "      <td>Female</td>\n",
2650
       "      <td>0</td>\n",
2651
       "      <td>31221.0</td>\n",
2652
       "      <td>False</td>\n",
2653
       "      <td>0.085657</td>\n",
2654
       "      <td>0.074380</td>\n",
2655
       "      <td>0.045523</td>\n",
2656
       "      <td>0.256471</td>\n",
2657
       "      <td>4356</td>\n",
2658
       "      <td>0.009833</td>\n",
2659
       "    </tr>\n",
2660
       "    <tr>\n",
2661
       "      <th>...</th>\n",
2662
       "      <td>...</td>\n",
2663
       "      <td>...</td>\n",
2664
       "      <td>...</td>\n",
2665
       "      <td>...</td>\n",
2666
       "      <td>...</td>\n",
2667
       "      <td>...</td>\n",
2668
       "      <td>...</td>\n",
2669
       "      <td>...</td>\n",
2670
       "      <td>...</td>\n",
2671
       "      <td>...</td>\n",
2672
       "      <td>...</td>\n",
2673
       "      <td>...</td>\n",
2674
       "      <td>...</td>\n",
2675
       "      <td>...</td>\n",
2676
       "      <td>...</td>\n",
2677
       "    </tr>\n",
2678
       "    <tr>\n",
2679
       "      <th>TTTGTCATCAACGCTA-26</th>\n",
2680
       "      <td>CTCL</td>\n",
2681
       "      <td>CTCL8</td>\n",
2682
       "      <td>WSSS_SKN10827912</td>\n",
2683
       "      <td>Epidermis</td>\n",
2684
       "      <td>lesion</td>\n",
2685
       "      <td>Male</td>\n",
2686
       "      <td>26</td>\n",
2687
       "      <td>3668.0</td>\n",
2688
       "      <td>False</td>\n",
2689
       "      <td>0.068616</td>\n",
2690
       "      <td>0.106569</td>\n",
2691
       "      <td>0.056269</td>\n",
2692
       "      <td>0.331646</td>\n",
2693
       "      <td>960</td>\n",
2694
       "      <td>0.001636</td>\n",
2695
       "    </tr>\n",
2696
       "    <tr>\n",
2697
       "      <th>TTTGTCATCACGCGGT-26</th>\n",
2698
       "      <td>CTCL</td>\n",
2699
       "      <td>CTCL8</td>\n",
2700
       "      <td>WSSS_SKN10827912</td>\n",
2701
       "      <td>Epidermis</td>\n",
2702
       "      <td>lesion</td>\n",
2703
       "      <td>Male</td>\n",
2704
       "      <td>26</td>\n",
2705
       "      <td>4474.0</td>\n",
2706
       "      <td>False</td>\n",
2707
       "      <td>0.110778</td>\n",
2708
       "      <td>0.106569</td>\n",
2709
       "      <td>0.056269</td>\n",
2710
       "      <td>0.331646</td>\n",
2711
       "      <td>697</td>\n",
2712
       "      <td>0.001341</td>\n",
2713
       "    </tr>\n",
2714
       "    <tr>\n",
2715
       "      <th>TTTGTCATCCAGATCA-26</th>\n",
2716
       "      <td>CTCL</td>\n",
2717
       "      <td>CTCL8</td>\n",
2718
       "      <td>WSSS_SKN10827912</td>\n",
2719
       "      <td>Epidermis</td>\n",
2720
       "      <td>lesion</td>\n",
2721
       "      <td>Male</td>\n",
2722
       "      <td>26</td>\n",
2723
       "      <td>4143.0</td>\n",
2724
       "      <td>False</td>\n",
2725
       "      <td>0.068616</td>\n",
2726
       "      <td>0.106569</td>\n",
2727
       "      <td>0.056269</td>\n",
2728
       "      <td>0.331646</td>\n",
2729
       "      <td>1235</td>\n",
2730
       "      <td>0.000724</td>\n",
2731
       "    </tr>\n",
2732
       "    <tr>\n",
2733
       "      <th>TTTGTCATCGGTCTAA-26</th>\n",
2734
       "      <td>CTCL</td>\n",
2735
       "      <td>CTCL8</td>\n",
2736
       "      <td>WSSS_SKN10827912</td>\n",
2737
       "      <td>Epidermis</td>\n",
2738
       "      <td>lesion</td>\n",
2739
       "      <td>Male</td>\n",
2740
       "      <td>26</td>\n",
2741
       "      <td>2593.0</td>\n",
2742
       "      <td>False</td>\n",
2743
       "      <td>0.124797</td>\n",
2744
       "      <td>0.106569</td>\n",
2745
       "      <td>0.056269</td>\n",
2746
       "      <td>0.331646</td>\n",
2747
       "      <td>874</td>\n",
2748
       "      <td>0.003471</td>\n",
2749
       "    </tr>\n",
2750
       "    <tr>\n",
2751
       "      <th>TTTGTCATCTTGTATC-26</th>\n",
2752
       "      <td>CTCL</td>\n",
2753
       "      <td>CTCL8</td>\n",
2754
       "      <td>WSSS_SKN10827912</td>\n",
2755
       "      <td>Epidermis</td>\n",
2756
       "      <td>lesion</td>\n",
2757
       "      <td>Male</td>\n",
2758
       "      <td>26</td>\n",
2759
       "      <td>4724.0</td>\n",
2760
       "      <td>False</td>\n",
2761
       "      <td>0.082090</td>\n",
2762
       "      <td>0.106569</td>\n",
2763
       "      <td>0.056269</td>\n",
2764
       "      <td>0.331646</td>\n",
2765
       "      <td>1061</td>\n",
2766
       "      <td>0.001905</td>\n",
2767
       "    </tr>\n",
2768
       "  </tbody>\n",
2769
       "</table>\n",
2770
       "<p>242723 rows × 15 columns</p>\n",
2771
       "</div>"
2772
      ],
2773
      "text/plain": [
2774
       "                    sample_type  Donor         Sanger_ID     tissue    site  \\\n",
2775
       "AAACCTGAGAAGCCCA-0         CTCL  CTCL1   WSSS_SKN8090612  Epidermis  lesion   \n",
2776
       "AAACCTGAGAATGTTG-0         CTCL  CTCL1   WSSS_SKN8090612  Epidermis  lesion   \n",
2777
       "AAACCTGAGCCAACAG-0         CTCL  CTCL1   WSSS_SKN8090612  Epidermis  lesion   \n",
2778
       "AAACCTGAGCGTTCCG-0         CTCL  CTCL1   WSSS_SKN8090612  Epidermis  lesion   \n",
2779
       "AAACCTGAGTACGTTC-0         CTCL  CTCL1   WSSS_SKN8090612  Epidermis  lesion   \n",
2780
       "...                         ...    ...               ...        ...     ...   \n",
2781
       "TTTGTCATCAACGCTA-26        CTCL  CTCL8  WSSS_SKN10827912  Epidermis  lesion   \n",
2782
       "TTTGTCATCACGCGGT-26        CTCL  CTCL8  WSSS_SKN10827912  Epidermis  lesion   \n",
2783
       "TTTGTCATCCAGATCA-26        CTCL  CTCL8  WSSS_SKN10827912  Epidermis  lesion   \n",
2784
       "TTTGTCATCGGTCTAA-26        CTCL  CTCL8  WSSS_SKN10827912  Epidermis  lesion   \n",
2785
       "TTTGTCATCTTGTATC-26        CTCL  CTCL8  WSSS_SKN10827912  Epidermis  lesion   \n",
2786
       "\n",
2787
       "                        Sex batch  n_counts  mad_prd        ds   mad_MED  \\\n",
2788
       "AAACCTGAGAAGCCCA-0   Female     0    9657.0    False  0.054859  0.074380   \n",
2789
       "AAACCTGAGAATGTTG-0   Female     0    7317.0    False  0.105085  0.074380   \n",
2790
       "AAACCTGAGCCAACAG-0   Female     0    2564.0    False  0.207339  0.074380   \n",
2791
       "AAACCTGAGCGTTCCG-0   Female     0    1501.0    False  0.118727  0.074380   \n",
2792
       "AAACCTGAGTACGTTC-0   Female     0   31221.0    False  0.085657  0.074380   \n",
2793
       "...                     ...   ...       ...      ...       ...       ...   \n",
2794
       "TTTGTCATCAACGCTA-26    Male    26    3668.0    False  0.068616  0.106569   \n",
2795
       "TTTGTCATCACGCGGT-26    Male    26    4474.0    False  0.110778  0.106569   \n",
2796
       "TTTGTCATCCAGATCA-26    Male    26    4143.0    False  0.068616  0.106569   \n",
2797
       "TTTGTCATCGGTCTAA-26    Male    26    2593.0    False  0.124797  0.106569   \n",
2798
       "TTTGTCATCTTGTATC-26    Male    26    4724.0    False  0.082090  0.106569   \n",
2799
       "\n",
2800
       "                      mad_MAD   mad_thr  n_genes  percent_mito  \n",
2801
       "AAACCTGAGAAGCCCA-0   0.045523  0.256471     2766      0.002692  \n",
2802
       "AAACCTGAGAATGTTG-0   0.045523  0.256471     3106      0.000547  \n",
2803
       "AAACCTGAGCCAACAG-0   0.045523  0.256471      633      0.003510  \n",
2804
       "AAACCTGAGCGTTCCG-0   0.045523  0.256471      979      0.039307  \n",
2805
       "AAACCTGAGTACGTTC-0   0.045523  0.256471     4356      0.009833  \n",
2806
       "...                       ...       ...      ...           ...  \n",
2807
       "TTTGTCATCAACGCTA-26  0.056269  0.331646      960      0.001636  \n",
2808
       "TTTGTCATCACGCGGT-26  0.056269  0.331646      697      0.001341  \n",
2809
       "TTTGTCATCCAGATCA-26  0.056269  0.331646     1235      0.000724  \n",
2810
       "TTTGTCATCGGTCTAA-26  0.056269  0.331646      874      0.003471  \n",
2811
       "TTTGTCATCTTGTATC-26  0.056269  0.331646     1061      0.001905  \n",
2812
       "\n",
2813
       "[242723 rows x 15 columns]"
2814
      ]
2815
     },
2816
     "execution_count": 31,
2817
     "metadata": {},
2818
     "output_type": "execute_result"
2819
    }
2820
   ],
2821
   "source": [
2822
    "adata.obs"
2823
   ]
2824
  },
2825
  {
2826
   "cell_type": "code",
2827
   "execution_count": 29,
2828
   "id": "cecf36fa-a540-4e75-a2e7-fc523dc9deb4",
2829
   "metadata": {
2830
    "tags": []
2831
   },
2832
   "outputs": [
2833
    {
2834
     "data": {
2835
      "image/png": "",
2836
      "text/plain": [
2837
       "<Figure size 1511.11x500 with 3 Axes>"
2838
      ]
2839
     },
2840
     "metadata": {},
2841
     "output_type": "display_data"
2842
    }
2843
   ],
2844
   "source": [
2845
    "sc.pl.violin(adata, ['n_genes', 'n_counts', 'percent_mito'],\n",
2846
    "             jitter=0.4, multi_panel=True)"
2847
   ]
2848
  },
2849
  {
2850
   "cell_type": "code",
2851
   "execution_count": 10,
2852
   "id": "1a470fa6-5ff8-41ff-a6d8-b6d63381e556",
2853
   "metadata": {
2854
    "tags": []
2855
   },
2856
   "outputs": [],
2857
   "source": [
2858
    "#adata.write_h5ad('/lustre/scratch126/cellgen/team298/ab72/CTCL/ctcl_cellbender_raw_dbrmv_9_5.h5ad')"
2859
   ]
2860
  },
2861
  {
2862
   "cell_type": "code",
2863
   "execution_count": 28,
2864
   "id": "3bedb10e-e575-4184-aee1-98cbdee0cce2",
2865
   "metadata": {
2866
    "tags": []
2867
   },
2868
   "outputs": [
2869
    {
2870
     "data": {
2871
      "text/plain": [
2872
       "AnnData object with n_obs × n_vars = 312708 × 29789\n",
2873
       "    obs: 'sample_type', 'Donor', 'Sanger_ID', 'batch', 'n_counts', 'donor_lane', 'mad_prd', 'ds', 'mad_MED', 'mad_MAD', 'mad_thr', 'n_genes', 'percent_mito'\n",
2874
       "    var: 'gene_ids', 'feature_types', 'n_cells'"
2875
      ]
2876
     },
2877
     "execution_count": 28,
2878
     "metadata": {},
2879
     "output_type": "execute_result"
2880
    }
2881
   ],
2882
   "source": [
2883
    "adata"
2884
   ]
2885
  },
2886
  {
2887
   "cell_type": "code",
2888
   "execution_count": 28,
2889
   "id": "6c7ce50e-b9b1-4ef6-af82-924e80bbba8e",
2890
   "metadata": {
2891
    "tags": []
2892
   },
2893
   "outputs": [],
2894
   "source": [
2895
    "adata = adata[adata.obs['n_genes'] < 6000, :]\n",
2896
    "adata = adata[adata.obs['n_genes'] > 400, :]\n",
2897
    "adata = adata[adata.obs['n_counts'] > 1000, :]\n",
2898
    "adata = adata[adata.obs['percent_mito'] < 0.2, :]"
2899
   ]
2900
  },
2901
  {
2902
   "cell_type": "code",
2903
   "execution_count": 29,
2904
   "id": "bd642ab8-f77e-4474-a977-ced7235c2b54",
2905
   "metadata": {
2906
    "tags": []
2907
   },
2908
   "outputs": [],
2909
   "source": [
2910
    "adata.write_h5ad('/lustre/scratch126/cellgen/team298/ab72/CTCL/ctcl_cellbender_raw_dbrmv_QCfiltered_17_5.h5ad')"
2911
   ]
2912
  }
2913
 ],
2914
 "metadata": {
2915
  "kernelspec": {
2916
   "display_name": "“multiome”",
2917
   "language": "python",
2918
   "name": "multiome"
2919
  },
2920
  "language_info": {
2921
   "codemirror_mode": {
2922
    "name": "ipython",
2923
    "version": 3
2924
   },
2925
   "file_extension": ".py",
2926
   "mimetype": "text/x-python",
2927
   "name": "python",
2928
   "nbconvert_exporter": "python",
2929
   "pygments_lexer": "ipython3",
2930
   "version": "3.10.4"
2931
  }
2932
 },
2933
 "nbformat": 4,
2934
 "nbformat_minor": 5
2935
}