--- a +++ b/code_final/ExpressionSet_perparation.ipynb @@ -0,0 +1,1274 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "single-replacement", + "metadata": {}, + "source": [ + "### bulk ExpressionSet" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "abroad-circle", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading required package: GenomicRanges\n", + "\n", + "Loading required package: stats4\n", + "\n", + "Loading required package: BiocGenerics\n", + "\n", + "Loading required package: parallel\n", + "\n", + "\n", + "Attaching package: ‘BiocGenerics’\n", + "\n", + "\n", + "The following objects are masked from ‘package:parallel’:\n", + "\n", + " clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,\n", + " clusterExport, clusterMap, parApply, parCapply, parLapply,\n", + " parLapplyLB, parRapply, parSapply, parSapplyLB\n", + "\n", + "\n", + "The following objects are masked from ‘package:stats’:\n", + "\n", + " IQR, mad, sd, var, xtabs\n", + "\n", + "\n", + "The following objects are masked from ‘package:base’:\n", + "\n", + " anyDuplicated, append, as.data.frame, basename, cbind, colnames,\n", + " dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,\n", + " grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,\n", + " order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,\n", + " rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,\n", + " union, unique, unsplit, which, which.max, which.min\n", + "\n", + "\n", + "Loading required package: S4Vectors\n", + "\n", + "\n", + "Attaching package: ‘S4Vectors’\n", + "\n", + "\n", + "The following object is masked from ‘package:base’:\n", + "\n", + " expand.grid\n", + "\n", + "\n", + "Loading required package: IRanges\n", + "\n", + "Loading required package: GenomeInfoDb\n", + "\n", + "Loading required package: Biobase\n", + "\n", + "Welcome to Bioconductor\n", + "\n", + " Vignettes contain introductory material; view with\n", + " 'browseVignettes()'. To cite Bioconductor, see\n", + " 'citation(\"Biobase\")', and for packages 'citation(\"pkgname\")'.\n", + "\n", + "\n", + "Loading required package: DelayedArray\n", + "\n", + "Loading required package: matrixStats\n", + "\n", + "\n", + "Attaching package: ‘matrixStats’\n", + "\n", + "\n", + "The following objects are masked from ‘package:Biobase’:\n", + "\n", + " anyMissing, rowMedians\n", + "\n", + "\n", + "\n", + "Attaching package: ‘DelayedArray’\n", + "\n", + "\n", + "The following objects are masked from ‘package:matrixStats’:\n", + "\n", + " colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges\n", + "\n", + "\n", + "The following objects are masked from ‘package:base’:\n", + "\n", + " aperm, apply, rowsum\n", + "\n", + "\n" + ] + } + ], + "source": [ + "library(SummarizedExperiment)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "experimental-disability", + "metadata": {}, + "outputs": [], + "source": [ + "Matrix <- read.table(\"/lustre/scratch117/cellgen/team205/rl20/CTCL/deconvolution/GSE121212_readcount_rmdup.txt\", \n", + " check.names=F) ### count matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "graduate-mining", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<table class=\"dataframe\">\n", + "<caption>A data.frame: 6 × 147</caption>\n", + "<thead>\n", + "\t<tr><th></th><th scope=col>AD_004_lesional</th><th scope=col>AD_004_non-lesional</th><th scope=col>AD_005_lesional</th><th scope=col>AD_005_non-lesional</th><th scope=col>AD_006_lesional</th><th scope=col>AD_006_non-lesional</th><th scope=col>AD_007_lesional</th><th scope=col>AD_007_non-lesional</th><th scope=col>AD_009_lesional</th><th scope=col>AD_009_non-lesional</th><th scope=col>⋯</th><th scope=col>AD_033_chronic_lesion</th><th scope=col>AD_034_non-lesional</th><th scope=col>AD_034_chronic_lesion</th><th scope=col>AD_032_chronic_lesion</th><th scope=col>AD_035_non-lesional</th><th scope=col>AD_035_chronic_lesion</th><th scope=col>AD_036_non-lesional</th><th scope=col>AD_036_chronic_lesion</th><th scope=col>AD_037_non-lesional</th><th scope=col>AD_037_chronic_lesion</th></tr>\n", + "\t<tr><th></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col>⋯</th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th></tr>\n", + "</thead>\n", + "<tbody>\n", + "\t<tr><th scope=row>5S_rRNA</th><td> 0</td><td> 3</td><td> 9</td><td> 12</td><td> 2</td><td> 3</td><td> 6</td><td> 4</td><td> 2</td><td> 5</td><td>⋯</td><td> 1</td><td> 2</td><td> 3</td><td> 3</td><td> 3</td><td> 4</td><td> 3</td><td> 5</td><td> 6</td><td> 2</td></tr>\n", + "\t<tr><th scope=row>7SK</th><td> 9</td><td> 6</td><td> 25</td><td> 25</td><td> 7</td><td> 8</td><td> 9</td><td> 8</td><td> 6</td><td> 15</td><td>⋯</td><td> 3</td><td> 8</td><td> 2</td><td> 5</td><td> 12</td><td> 5</td><td> 7</td><td> 5</td><td> 8</td><td> 3</td></tr>\n", + "\t<tr><th scope=row>A1BG</th><td> 2</td><td> 0</td><td> 4</td><td> 3</td><td> 1</td><td> 1</td><td> 0</td><td> 0</td><td> 4</td><td> 0</td><td>⋯</td><td> 1</td><td> 1</td><td> 2</td><td> 0</td><td> 1</td><td> 2</td><td> 0</td><td> 0</td><td> 1</td><td> 1</td></tr>\n", + "\t<tr><th scope=row>A1BG-AS1</th><td> 5</td><td> 0</td><td> 11</td><td> 8</td><td> 7</td><td> 8</td><td> 13</td><td> 2</td><td> 6</td><td> 19</td><td>⋯</td><td> 4</td><td> 0</td><td> 7</td><td> 7</td><td> 4</td><td> 4</td><td> 10</td><td> 13</td><td> 8</td><td> 4</td></tr>\n", + "\t<tr><th scope=row>A1CF</th><td> 2</td><td> 3</td><td> 2</td><td> 8</td><td> 1</td><td> 2</td><td> 0</td><td> 5</td><td> 1</td><td> 5</td><td>⋯</td><td> 3</td><td> 0</td><td> 1</td><td> 0</td><td> 0</td><td> 2</td><td> 0</td><td> 0</td><td> 1</td><td> 0</td></tr>\n", + "\t<tr><th scope=row>A2M</th><td>308</td><td>353</td><td>2544</td><td>1366</td><td>978</td><td>406</td><td>2761</td><td>928</td><td>2796</td><td>1891</td><td>⋯</td><td>1463</td><td>1391</td><td>1714</td><td>1998</td><td>683</td><td>1154</td><td>2361</td><td>2374</td><td>1040</td><td>2755</td></tr>\n", + "</tbody>\n", + "</table>\n" + ], + "text/latex": [ + "A data.frame: 6 × 147\n", + "\\begin{tabular}{r|lllllllllllllllllllll}\n", + " & AD\\_004\\_lesional & AD\\_004\\_non-lesional & AD\\_005\\_lesional & AD\\_005\\_non-lesional & AD\\_006\\_lesional & AD\\_006\\_non-lesional & AD\\_007\\_lesional & AD\\_007\\_non-lesional & AD\\_009\\_lesional & AD\\_009\\_non-lesional & ⋯ & AD\\_033\\_chronic\\_lesion & AD\\_034\\_non-lesional & AD\\_034\\_chronic\\_lesion & AD\\_032\\_chronic\\_lesion & AD\\_035\\_non-lesional & AD\\_035\\_chronic\\_lesion & AD\\_036\\_non-lesional & AD\\_036\\_chronic\\_lesion & AD\\_037\\_non-lesional & AD\\_037\\_chronic\\_lesion\\\\\n", + " & <int> & <int> & <int> & <int> & <int> & <int> & <int> & <int> & <int> & <int> & ⋯ & <int> & <int> & <int> & <int> & <int> & <int> & <int> & <int> & <int> & <int>\\\\\n", + "\\hline\n", + "\t5S\\_rRNA & 0 & 3 & 9 & 12 & 2 & 3 & 6 & 4 & 2 & 5 & ⋯ & 1 & 2 & 3 & 3 & 3 & 4 & 3 & 5 & 6 & 2\\\\\n", + "\t7SK & 9 & 6 & 25 & 25 & 7 & 8 & 9 & 8 & 6 & 15 & ⋯ & 3 & 8 & 2 & 5 & 12 & 5 & 7 & 5 & 8 & 3\\\\\n", + "\tA1BG & 2 & 0 & 4 & 3 & 1 & 1 & 0 & 0 & 4 & 0 & ⋯ & 1 & 1 & 2 & 0 & 1 & 2 & 0 & 0 & 1 & 1\\\\\n", + "\tA1BG-AS1 & 5 & 0 & 11 & 8 & 7 & 8 & 13 & 2 & 6 & 19 & ⋯ & 4 & 0 & 7 & 7 & 4 & 4 & 10 & 13 & 8 & 4\\\\\n", + "\tA1CF & 2 & 3 & 2 & 8 & 1 & 2 & 0 & 5 & 1 & 5 & ⋯ & 3 & 0 & 1 & 0 & 0 & 2 & 0 & 0 & 1 & 0\\\\\n", + "\tA2M & 308 & 353 & 2544 & 1366 & 978 & 406 & 2761 & 928 & 2796 & 1891 & ⋯ & 1463 & 1391 & 1714 & 1998 & 683 & 1154 & 2361 & 2374 & 1040 & 2755\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A data.frame: 6 × 147\n", + "\n", + "| <!--/--> | AD_004_lesional <int> | AD_004_non-lesional <int> | AD_005_lesional <int> | AD_005_non-lesional <int> | AD_006_lesional <int> | AD_006_non-lesional <int> | AD_007_lesional <int> | AD_007_non-lesional <int> | AD_009_lesional <int> | AD_009_non-lesional <int> | ⋯ ⋯ | AD_033_chronic_lesion <int> | AD_034_non-lesional <int> | AD_034_chronic_lesion <int> | AD_032_chronic_lesion <int> | AD_035_non-lesional <int> | AD_035_chronic_lesion <int> | AD_036_non-lesional <int> | AD_036_chronic_lesion <int> | AD_037_non-lesional <int> | AD_037_chronic_lesion <int> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| 5S_rRNA | 0 | 3 | 9 | 12 | 2 | 3 | 6 | 4 | 2 | 5 | ⋯ | 1 | 2 | 3 | 3 | 3 | 4 | 3 | 5 | 6 | 2 |\n", + "| 7SK | 9 | 6 | 25 | 25 | 7 | 8 | 9 | 8 | 6 | 15 | ⋯ | 3 | 8 | 2 | 5 | 12 | 5 | 7 | 5 | 8 | 3 |\n", + "| A1BG | 2 | 0 | 4 | 3 | 1 | 1 | 0 | 0 | 4 | 0 | ⋯ | 1 | 1 | 2 | 0 | 1 | 2 | 0 | 0 | 1 | 1 |\n", + "| A1BG-AS1 | 5 | 0 | 11 | 8 | 7 | 8 | 13 | 2 | 6 | 19 | ⋯ | 4 | 0 | 7 | 7 | 4 | 4 | 10 | 13 | 8 | 4 |\n", + "| A1CF | 2 | 3 | 2 | 8 | 1 | 2 | 0 | 5 | 1 | 5 | ⋯ | 3 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 1 | 0 |\n", + "| A2M | 308 | 353 | 2544 | 1366 | 978 | 406 | 2761 | 928 | 2796 | 1891 | ⋯ | 1463 | 1391 | 1714 | 1998 | 683 | 1154 | 2361 | 2374 | 1040 | 2755 |\n", + "\n" + ], + "text/plain": [ + " AD_004_lesional AD_004_non-lesional AD_005_lesional\n", + "5S_rRNA 0 3 9 \n", + "7SK 9 6 25 \n", + "A1BG 2 0 4 \n", + "A1BG-AS1 5 0 11 \n", + "A1CF 2 3 2 \n", + "A2M 308 353 2544 \n", + " AD_005_non-lesional AD_006_lesional AD_006_non-lesional\n", + "5S_rRNA 12 2 3 \n", + "7SK 25 7 8 \n", + "A1BG 3 1 1 \n", + "A1BG-AS1 8 7 8 \n", + "A1CF 8 1 2 \n", + "A2M 1366 978 406 \n", + " AD_007_lesional AD_007_non-lesional AD_009_lesional\n", + "5S_rRNA 6 4 2 \n", + "7SK 9 8 6 \n", + "A1BG 0 0 4 \n", + "A1BG-AS1 13 2 6 \n", + "A1CF 0 5 1 \n", + "A2M 2761 928 2796 \n", + " AD_009_non-lesional ⋯ AD_033_chronic_lesion AD_034_non-lesional\n", + "5S_rRNA 5 ⋯ 1 2 \n", + "7SK 15 ⋯ 3 8 \n", + "A1BG 0 ⋯ 1 1 \n", + "A1BG-AS1 19 ⋯ 4 0 \n", + "A1CF 5 ⋯ 3 0 \n", + "A2M 1891 ⋯ 1463 1391 \n", + " AD_034_chronic_lesion AD_032_chronic_lesion AD_035_non-lesional\n", + "5S_rRNA 3 3 3 \n", + "7SK 2 5 12 \n", + "A1BG 2 0 1 \n", + "A1BG-AS1 7 7 4 \n", + "A1CF 1 0 0 \n", + "A2M 1714 1998 683 \n", + " AD_035_chronic_lesion AD_036_non-lesional AD_036_chronic_lesion\n", + "5S_rRNA 4 3 5 \n", + "7SK 5 7 5 \n", + "A1BG 2 0 0 \n", + "A1BG-AS1 4 10 13 \n", + "A1CF 2 0 0 \n", + "A2M 1154 2361 2374 \n", + " AD_037_non-lesional AD_037_chronic_lesion\n", + "5S_rRNA 6 2 \n", + "7SK 8 3 \n", + "A1BG 1 1 \n", + "A1BG-AS1 8 4 \n", + "A1CF 1 0 \n", + "A2M 1040 2755 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "<style>\n", + ".list-inline {list-style: none; margin:0; padding: 0}\n", + ".list-inline>li {display: inline-block}\n", + ".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n", + "</style>\n", + "<ol class=list-inline><li>31362</li><li>147</li></ol>\n" + ], + "text/latex": [ + "\\begin{enumerate*}\n", + "\\item 31362\n", + "\\item 147\n", + "\\end{enumerate*}\n" + ], + "text/markdown": [ + "1. 31362\n", + "2. 147\n", + "\n", + "\n" + ], + "text/plain": [ + "[1] 31362 147" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "head(Matrix);dim(Matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "jewish-sessions", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<style>\n", + ".list-inline {list-style: none; margin:0; padding: 0}\n", + ".list-inline>li {display: inline-block}\n", + ".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n", + "</style>\n", + "<ol class=list-inline><li>31362</li><li>147</li></ol>\n" + ], + "text/latex": [ + "\\begin{enumerate*}\n", + "\\item 31362\n", + "\\item 147\n", + "\\end{enumerate*}\n" + ], + "text/markdown": [ + "1. 31362\n", + "2. 147\n", + "\n", + "\n" + ], + "text/plain": [ + "[1] 31362 147" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "is_genename_uniq <- !duplicated(rownames(Matrix))\n", + "Matrix <- Matrix[is_genename_uniq, ]\n", + "dim(Matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "professional-pattern", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [], + "text/latex": [], + "text/markdown": [], + "text/plain": [ + "named integer(0)" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "tt <- apply(Matrix, 2, sum)\n", + "del <- which(tt==0)\n", + "del" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "excess-salvation", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<table class=\"dataframe\">\n", + "<caption>A data.frame: 6 × 1</caption>\n", + "<thead>\n", + "\t<tr><th></th><th scope=col>gene_name</th></tr>\n", + "\t<tr><th></th><th scope=col><chr></th></tr>\n", + "</thead>\n", + "<tbody>\n", + "\t<tr><th scope=row>5S_rRNA</th><td>5S_rRNA </td></tr>\n", + "\t<tr><th scope=row>7SK</th><td>7SK </td></tr>\n", + "\t<tr><th scope=row>A1BG</th><td>A1BG </td></tr>\n", + "\t<tr><th scope=row>A1BG-AS1</th><td>A1BG-AS1</td></tr>\n", + "\t<tr><th scope=row>A1CF</th><td>A1CF </td></tr>\n", + "\t<tr><th scope=row>A2M</th><td>A2M </td></tr>\n", + "</tbody>\n", + "</table>\n" + ], + "text/latex": [ + "A data.frame: 6 × 1\n", + "\\begin{tabular}{r|l}\n", + " & gene\\_name\\\\\n", + " & <chr>\\\\\n", + "\\hline\n", + "\t5S\\_rRNA & 5S\\_rRNA \\\\\n", + "\t7SK & 7SK \\\\\n", + "\tA1BG & A1BG \\\\\n", + "\tA1BG-AS1 & A1BG-AS1\\\\\n", + "\tA1CF & A1CF \\\\\n", + "\tA2M & A2M \\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A data.frame: 6 × 1\n", + "\n", + "| <!--/--> | gene_name <chr> |\n", + "|---|---|\n", + "| 5S_rRNA | 5S_rRNA |\n", + "| 7SK | 7SK |\n", + "| A1BG | A1BG |\n", + "| A1BG-AS1 | A1BG-AS1 |\n", + "| A1CF | A1CF |\n", + "| A2M | A2M |\n", + "\n" + ], + "text/plain": [ + " gene_name\n", + "5S_rRNA 5S_rRNA \n", + "7SK 7SK \n", + "A1BG A1BG \n", + "A1BG-AS1 A1BG-AS1 \n", + "A1CF A1CF \n", + "A2M A2M " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fdat_df <- data.frame(gene_name=rownames(Matrix), stringsAsFactors=F)\n", + "rownames(fdat_df) <- rownames(Matrix)\n", + "metadata_fdat_df <- data.frame(labelDescription= c(\"gene name\"), row.names=c(\"gene_name\"))\n", + "head(fdat_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "collaborative-inspiration", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<table class=\"dataframe\">\n", + "<caption>A data.frame: 6 × 2</caption>\n", + "<thead>\n", + "\t<tr><th></th><th scope=col>sample_type</th><th scope=col>condition</th></tr>\n", + "\t<tr><th></th><th scope=col><chr></th><th scope=col><chr></th></tr>\n", + "</thead>\n", + "<tbody>\n", + "\t<tr><th scope=row>AD_004_lesional</th><td>AD_L </td><td>AD</td></tr>\n", + "\t<tr><th scope=row>AD_004_non-lesional</th><td>AD_NL</td><td>AD</td></tr>\n", + "\t<tr><th scope=row>AD_005_lesional</th><td>AD_L </td><td>AD</td></tr>\n", + "\t<tr><th scope=row>AD_005_non-lesional</th><td>AD_NL</td><td>AD</td></tr>\n", + "\t<tr><th scope=row>AD_006_lesional</th><td>AD_L </td><td>AD</td></tr>\n", + "\t<tr><th scope=row>AD_006_non-lesional</th><td>AD_NL</td><td>AD</td></tr>\n", + "</tbody>\n", + "</table>\n" + ], + "text/latex": [ + "A data.frame: 6 × 2\n", + "\\begin{tabular}{r|ll}\n", + " & sample\\_type & condition\\\\\n", + " & <chr> & <chr>\\\\\n", + "\\hline\n", + "\tAD\\_004\\_lesional & AD\\_L & AD\\\\\n", + "\tAD\\_004\\_non-lesional & AD\\_NL & AD\\\\\n", + "\tAD\\_005\\_lesional & AD\\_L & AD\\\\\n", + "\tAD\\_005\\_non-lesional & AD\\_NL & AD\\\\\n", + "\tAD\\_006\\_lesional & AD\\_L & AD\\\\\n", + "\tAD\\_006\\_non-lesional & AD\\_NL & AD\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A data.frame: 6 × 2\n", + "\n", + "| <!--/--> | sample_type <chr> | condition <chr> |\n", + "|---|---|---|\n", + "| AD_004_lesional | AD_L | AD |\n", + "| AD_004_non-lesional | AD_NL | AD |\n", + "| AD_005_lesional | AD_L | AD |\n", + "| AD_005_non-lesional | AD_NL | AD |\n", + "| AD_006_lesional | AD_L | AD |\n", + "| AD_006_non-lesional | AD_NL | AD |\n", + "\n" + ], + "text/plain": [ + " sample_type condition\n", + "AD_004_lesional AD_L AD \n", + "AD_004_non-lesional AD_NL AD \n", + "AD_005_lesional AD_L AD \n", + "AD_005_non-lesional AD_NL AD \n", + "AD_006_lesional AD_L AD \n", + "AD_006_non-lesional AD_NL AD " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "metadata <- read.table(\"/lustre/scratch117/cellgen/team205/rl20/CTCL/deconvolution/GSE121212_meta.xls\", \n", + " check.names=F, sep=\"\\t\", header = T)\n", + "head(metadata)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "declared-cookbook", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<table class=\"dataframe\">\n", + "<caption>A data.frame: 6 × 2</caption>\n", + "<thead>\n", + "\t<tr><th></th><th scope=col>sample_type</th><th scope=col>condition</th></tr>\n", + "\t<tr><th></th><th scope=col><chr></th><th scope=col><chr></th></tr>\n", + "</thead>\n", + "<tbody>\n", + "\t<tr><th scope=row>AD_004_lesional</th><td>AD_L </td><td>AD</td></tr>\n", + "\t<tr><th scope=row>AD_004_non-lesional</th><td>AD_NL</td><td>AD</td></tr>\n", + "\t<tr><th scope=row>AD_005_lesional</th><td>AD_L </td><td>AD</td></tr>\n", + "\t<tr><th scope=row>AD_005_non-lesional</th><td>AD_NL</td><td>AD</td></tr>\n", + "\t<tr><th scope=row>AD_006_lesional</th><td>AD_L </td><td>AD</td></tr>\n", + "\t<tr><th scope=row>AD_006_non-lesional</th><td>AD_NL</td><td>AD</td></tr>\n", + "</tbody>\n", + "</table>\n" + ], + "text/latex": [ + "A data.frame: 6 × 2\n", + "\\begin{tabular}{r|ll}\n", + " & sample\\_type & condition\\\\\n", + " & <chr> & <chr>\\\\\n", + "\\hline\n", + "\tAD\\_004\\_lesional & AD\\_L & AD\\\\\n", + "\tAD\\_004\\_non-lesional & AD\\_NL & AD\\\\\n", + "\tAD\\_005\\_lesional & AD\\_L & AD\\\\\n", + "\tAD\\_005\\_non-lesional & AD\\_NL & AD\\\\\n", + "\tAD\\_006\\_lesional & AD\\_L & AD\\\\\n", + "\tAD\\_006\\_non-lesional & AD\\_NL & AD\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A data.frame: 6 × 2\n", + "\n", + "| <!--/--> | sample_type <chr> | condition <chr> |\n", + "|---|---|---|\n", + "| AD_004_lesional | AD_L | AD |\n", + "| AD_004_non-lesional | AD_NL | AD |\n", + "| AD_005_lesional | AD_L | AD |\n", + "| AD_005_non-lesional | AD_NL | AD |\n", + "| AD_006_lesional | AD_L | AD |\n", + "| AD_006_non-lesional | AD_NL | AD |\n", + "\n" + ], + "text/plain": [ + " sample_type condition\n", + "AD_004_lesional AD_L AD \n", + "AD_004_non-lesional AD_NL AD \n", + "AD_005_lesional AD_L AD \n", + "AD_005_non-lesional AD_NL AD \n", + "AD_006_lesional AD_L AD \n", + "AD_006_non-lesional AD_NL AD " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pdat_df <- data.frame(sample_type = metadata$sample_type, \n", + " condition = metadata$condition,\n", + " stringsAsFactors = F)\n", + "rownames(pdat_df) <- rownames(metadata)\n", + "head(pdat_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "following-sleeve", + "metadata": {}, + "outputs": [], + "source": [ + "metadata_pdat_df <- data.frame(labelDescription= c(\"lesion_nonlesion\", \"Condition\"), \n", + " row.names=c(\"sample_type\", \"condition\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "racial-modem", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<table class=\"dataframe\">\n", + "<caption>A data.frame: 2 × 1</caption>\n", + "<thead>\n", + "\t<tr><th></th><th scope=col>labelDescription</th></tr>\n", + "\t<tr><th></th><th scope=col><chr></th></tr>\n", + "</thead>\n", + "<tbody>\n", + "\t<tr><th scope=row>sample_type</th><td>lesion_nonlesion</td></tr>\n", + "\t<tr><th scope=row>condition</th><td>Condition </td></tr>\n", + "</tbody>\n", + "</table>\n" + ], + "text/latex": [ + "A data.frame: 2 × 1\n", + "\\begin{tabular}{r|l}\n", + " & labelDescription\\\\\n", + " & <chr>\\\\\n", + "\\hline\n", + "\tsample\\_type & lesion\\_nonlesion\\\\\n", + "\tcondition & Condition \\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A data.frame: 2 × 1\n", + "\n", + "| <!--/--> | labelDescription <chr> |\n", + "|---|---|\n", + "| sample_type | lesion_nonlesion |\n", + "| condition | Condition |\n", + "\n" + ], + "text/plain": [ + " labelDescription\n", + "sample_type lesion_nonlesion\n", + "condition Condition " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "metadata_pdat_df" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "pursuant-switch", + "metadata": {}, + "outputs": [], + "source": [ + "Matrix <- Matrix[ ,rownames(pdat_df)] ### the order of rownames(pdat_df) and colnames(Matrix) might be different" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "comfortable-prague", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "TRUE" + ], + "text/latex": [ + "TRUE" + ], + "text/markdown": [ + "TRUE" + ], + "text/plain": [ + "[1] TRUE" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "all.equal(colnames(Matrix), rownames(pdat_df))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "ruled-spencer", + "metadata": {}, + "outputs": [], + "source": [ + "bulk_eset <- ExpressionSet(\n", + " assayData = data.matrix(Matrix),\n", + " phenoData=new(\"AnnotatedDataFrame\", data = pdat_df, varMetadata = metadata_pdat_df),\n", + " featureData=new(\"AnnotatedDataFrame\", data = fdat_df, varMetadata = metadata_fdat_df))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "renewable-mentor", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ExpressionSet (storageMode: lockedEnvironment)\n", + "assayData: 31362 features, 147 samples \n", + " element names: exprs \n", + "protocolData: none\n", + "phenoData\n", + " sampleNames: AD_004_lesional AD_004_non-lesional ...\n", + " AD_037_chronic_lesion (147 total)\n", + " varLabels: sample_type condition\n", + " varMetadata: labelDescription\n", + "featureData\n", + " featureNames: 5S_rRNA 7SK ... snoZ5 (31362 total)\n", + " fvarLabels: gene_name\n", + " fvarMetadata: labelDescription\n", + "experimentData: use 'experimentData(object)'\n", + "Annotation: " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "bulk_eset" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "personalized-power", + "metadata": {}, + "outputs": [], + "source": [ + "saveRDS(bulk_eset, file=\"/lustre/scratch117/cellgen/team205/rl20/CTCL/deconvolution/GSE121212_readcount_ExpressionSet.rds\")" + ] + }, + { + "cell_type": "markdown", + "id": "floral-undergraduate", + "metadata": {}, + "source": [ + "### Single cell ExpressionSet" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "passive-sponsorship", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading required package: reticulate\n", + "\n", + "Attaching SeuratObject\n", + "\n", + "\n", + "Attaching package: ‘Seurat’\n", + "\n", + "\n", + "The following object is masked from ‘package:SummarizedExperiment’:\n", + "\n", + " Assays\n", + "\n", + "\n", + "\n", + "Attaching package: ‘Matrix’\n", + "\n", + "\n", + "The following object is masked from ‘package:S4Vectors’:\n", + "\n", + " expand\n", + "\n", + "\n" + ] + } + ], + "source": [ + "library(sceasy)\n", + "library(reticulate)\n", + "library(anndata)\n", + "library(Seurat)\n", + "library(BisqueRNA)\n", + "library(Biobase)\n", + "library(Matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "determined-permit", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Warning message in asMethod(object):\n", + "“sparse->dense coercion: allocating vector of size 7.3 GiB”\n" + ] + } + ], + "source": [ + "h5ad_file <- \"/lustre/scratch126/cellgen/team205/rl20/CTCL/object_revision/All_samples_final_20240707_sub0.08_for_deconv.h5ad\"\n", + "sdata <- read_h5ad(h5ad_file)\n", + "seurat_object <- CreateSeuratObject(counts = t(as.matrix(sdata$X)), meta.data = sdata$obs)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "criminal-manor", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "An object of class Seurat \n", + "15777 features across 62199 samples within 1 assay \n", + "Active assay: RNA (15777 features, 0 variable features)" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "seurat_object" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "beginning-tongue", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " [[ suppressing 20 column names ‘AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1’, ‘AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1’, ‘AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1’ ... ]]\n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "20 x 20 sparse Matrix of class \"dgCMatrix\"\n", + " \n", + "SAMD11 . . . . . . . . . . . . . . . . . . . .\n", + "NOC2L 1 1 . . 2 . . 1 . . . 3 1 . . . . . . .\n", + "KLHL17 . . . . . . . . . . . . . . . . . . . .\n", + "PLEKHN1 2 . . . . . . . 1 . . 1 . . . . . . . .\n", + "HES4 . . 1 . . . . . . . 2 . . . . . 1 . . .\n", + "ISG15 . . . . . . . 1 . . . . . . . . . . . .\n", + "AGRN . . . . . . . . . 1 . . . . 1 . . . . .\n", + "RNF223 . . . . . . . . . . . . . . . . . . . .\n", + "C1orf159 . . . . . . . . . . 1 . . . . . . . . .\n", + "TTLL10 . . . . . . . . . . . . . . . . . . . .\n", + "TNFRSF18 3 1 1 . 1 . . 5 . . . 1 3 . . 1 . . . .\n", + "TNFRSF4 . . . . . . . 3 . 1 . . . . . . . . . .\n", + "SDF4 6 1 5 1 1 3 . 2 . . . 3 . 2 2 1 1 . . .\n", + "B3GALT6 . . . . . . . . . . . . . . . . . . . .\n", + "C1QTNF12 . . 1 . . . . . . . . . . . 1 . . . 1 .\n", + "UBE2J2 . 1 . . 3 . . 1 . . 4 1 . 1 . . 2 . . .\n", + "SCNN1D . . 1 . . . . . . . . . . . . . . . . .\n", + "ACAP3 . . . . . 1 . . . . . . . . . . . . . .\n", + "PUSL1 . . . . . . . . 1 . . . . . . . . . . .\n", + "INTS11 3 . . . . 1 . . . . 2 . . 2 1 1 . . . ." + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "gene_exprs.matrix <- seurat_object@assays$RNA@counts\n", + "gene_exprs.matrix[1:20,1:20]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "marine-philosophy", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<table class=\"dataframe\">\n", + "<caption>A data.frame: 6 × 22</caption>\n", + "<thead>\n", + "\t<tr><th></th><th scope=col>orig.ident</th><th scope=col>nCount_RNA</th><th scope=col>nFeature_RNA</th><th scope=col>batch</th><th scope=col>n_counts</th><th scope=col>n_genes</th><th scope=col>percent_mito</th><th scope=col>donor</th><th scope=col>tech</th><th scope=col>sample_type</th><th scope=col>⋯</th><th scope=col>broad_ct</th><th scope=col>ctpredicted_labels</th><th scope=col>ctover_clustering</th><th scope=col>ctmajority_voting</th><th scope=col>ctconf_score</th><th scope=col>cell_type</th><th scope=col>site</th><th scope=col>tissue</th><th scope=col>Sex</th><th scope=col>type</th></tr>\n", + "\t<tr><th></th><th scope=col><fct></th><th scope=col><dbl></th><th scope=col><int></th><th scope=col><fct></th><th scope=col><dbl></th><th scope=col><dbl></th><th scope=col><dbl></th><th scope=col><fct></th><th scope=col><fct></th><th scope=col><fct></th><th scope=col>⋯</th><th scope=col><fct></th><th scope=col><fct></th><th scope=col><fct></th><th scope=col><fct></th><th scope=col><dbl></th><th scope=col><fct></th><th scope=col><fct></th><th scope=col><fct></th><th scope=col><fct></th><th scope=col><fct></th></tr>\n", + "</thead>\n", + "<tbody>\n", + "\t<tr><th scope=row>AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1</th><td>SeuratProject</td><td>14602</td><td>3142</td><td>0</td><td>27577</td><td>3474</td><td>0.0011241252</td><td>CTCL1</td><td>10x</td><td>CTCL</td><td>⋯</td><td>Keratinocytes</td><td>Differentiated_KC*</td><td>286</td><td>Differentiated_KC </td><td>0.96744886</td><td>Differentiated_KC </td><td>lesion</td><td>Epidermis</td><td>Female</td><td>NA</td></tr>\n", + "\t<tr><th scope=row>AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1</th><td>SeuratProject</td><td>13463</td><td>3311</td><td>0</td><td>21021</td><td>3642</td><td>0.0004281433</td><td>CTCL1</td><td>10x</td><td>CTCL</td><td>⋯</td><td>Keratinocytes</td><td>Differentiated_KC*</td><td>492</td><td>Differentiated_KC*</td><td>0.13851590</td><td>Differentiated_KC*</td><td>lesion</td><td>Epidermis</td><td>Female</td><td>NA</td></tr>\n", + "\t<tr><th scope=row>AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1</th><td>SeuratProject</td><td>14551</td><td>3472</td><td>0</td><td>27837</td><td>3824</td><td>0.0024787153</td><td>CTCL1</td><td>10x</td><td>CTCL</td><td>⋯</td><td>Keratinocytes</td><td>Differentiated_KC*</td><td>535</td><td>Differentiated_KC </td><td>0.01321663</td><td>Differentiated_KC </td><td>lesion</td><td>Epidermis</td><td>Female</td><td>NA</td></tr>\n", + "\t<tr><th scope=row>AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1</th><td>SeuratProject</td><td>11215</td><td>2632</td><td>0</td><td>17545</td><td>2943</td><td>0.0056426334</td><td>CTCL1</td><td>10x</td><td>CTCL</td><td>⋯</td><td>Keratinocytes</td><td>Differentiated_KC*</td><td>298</td><td>Differentiated_KC*</td><td>0.90835925</td><td>Differentiated_KC*</td><td>lesion</td><td>Epidermis</td><td>Female</td><td>NA</td></tr>\n", + "\t<tr><th scope=row>AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1</th><td>SeuratProject</td><td> 6150</td><td>2384</td><td>0</td><td> 9136</td><td>2665</td><td>0.0010945710</td><td>CTCL1</td><td>10x</td><td>CTCL</td><td>⋯</td><td>Keratinocytes</td><td>VE2 </td><td>368</td><td>Differentiated_KC*</td><td>0.04688790</td><td>Differentiated_KC*</td><td>lesion</td><td>Epidermis</td><td>Female</td><td>NA</td></tr>\n", + "\t<tr><th scope=row>AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1</th><td>SeuratProject</td><td> 5296</td><td>1860</td><td>0</td><td>11803</td><td>2101</td><td>0.0155045325</td><td>CTCL1</td><td>10x</td><td>CTCL</td><td>⋯</td><td>Keratinocytes</td><td>Differentiated_KC </td><td>438</td><td>Differentiated_KC </td><td>0.67155350</td><td>Differentiated_KC </td><td>lesion</td><td>Epidermis</td><td>Female</td><td>NA</td></tr>\n", + "</tbody>\n", + "</table>\n" + ], + "text/latex": [ + "A data.frame: 6 × 22\n", + "\\begin{tabular}{r|lllllllllllllllllllll}\n", + " & orig.ident & nCount\\_RNA & nFeature\\_RNA & batch & n\\_counts & n\\_genes & percent\\_mito & donor & tech & sample\\_type & ⋯ & broad\\_ct & ctpredicted\\_labels & ctover\\_clustering & ctmajority\\_voting & ctconf\\_score & cell\\_type & site & tissue & Sex & type\\\\\n", + " & <fct> & <dbl> & <int> & <fct> & <dbl> & <dbl> & <dbl> & <fct> & <fct> & <fct> & ⋯ & <fct> & <fct> & <fct> & <fct> & <dbl> & <fct> & <fct> & <fct> & <fct> & <fct>\\\\\n", + "\\hline\n", + "\tAAACCTGCATCACAAC-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 14602 & 3142 & 0 & 27577 & 3474 & 0.0011241252 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & Differentiated\\_KC* & 286 & Differentiated\\_KC & 0.96744886 & Differentiated\\_KC & lesion & Epidermis & Female & NA\\\\\n", + "\tAAACCTGCATGTAAGA-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 13463 & 3311 & 0 & 21021 & 3642 & 0.0004281433 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & Differentiated\\_KC* & 492 & Differentiated\\_KC* & 0.13851590 & Differentiated\\_KC* & lesion & Epidermis & Female & NA\\\\\n", + "\tAAACGGGGTCGACTGC-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 14551 & 3472 & 0 & 27837 & 3824 & 0.0024787153 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & Differentiated\\_KC* & 535 & Differentiated\\_KC & 0.01321663 & Differentiated\\_KC & lesion & Epidermis & Female & NA\\\\\n", + "\tAAAGCAAAGCGTAATA-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 11215 & 2632 & 0 & 17545 & 2943 & 0.0056426334 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & Differentiated\\_KC* & 298 & Differentiated\\_KC* & 0.90835925 & Differentiated\\_KC* & lesion & Epidermis & Female & NA\\\\\n", + "\tAAAGCAAGTCCTGCTT-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 6150 & 2384 & 0 & 9136 & 2665 & 0.0010945710 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & VE2 & 368 & Differentiated\\_KC* & 0.04688790 & Differentiated\\_KC* & lesion & Epidermis & Female & NA\\\\\n", + "\tAAAGCAATCCCATTTA-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 5296 & 1860 & 0 & 11803 & 2101 & 0.0155045325 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & Differentiated\\_KC & 438 & Differentiated\\_KC & 0.67155350 & Differentiated\\_KC & lesion & Epidermis & Female & NA\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A data.frame: 6 × 22\n", + "\n", + "| <!--/--> | orig.ident <fct> | nCount_RNA <dbl> | nFeature_RNA <int> | batch <fct> | n_counts <dbl> | n_genes <dbl> | percent_mito <dbl> | donor <fct> | tech <fct> | sample_type <fct> | ⋯ ⋯ | broad_ct <fct> | ctpredicted_labels <fct> | ctover_clustering <fct> | ctmajority_voting <fct> | ctconf_score <dbl> | cell_type <fct> | site <fct> | tissue <fct> | Sex <fct> | type <fct> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 14602 | 3142 | 0 | 27577 | 3474 | 0.0011241252 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | Differentiated_KC* | 286 | Differentiated_KC | 0.96744886 | Differentiated_KC | lesion | Epidermis | Female | NA |\n", + "| AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 13463 | 3311 | 0 | 21021 | 3642 | 0.0004281433 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | Differentiated_KC* | 492 | Differentiated_KC* | 0.13851590 | Differentiated_KC* | lesion | Epidermis | Female | NA |\n", + "| AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 14551 | 3472 | 0 | 27837 | 3824 | 0.0024787153 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | Differentiated_KC* | 535 | Differentiated_KC | 0.01321663 | Differentiated_KC | lesion | Epidermis | Female | NA |\n", + "| AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 11215 | 2632 | 0 | 17545 | 2943 | 0.0056426334 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | Differentiated_KC* | 298 | Differentiated_KC* | 0.90835925 | Differentiated_KC* | lesion | Epidermis | Female | NA |\n", + "| AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 6150 | 2384 | 0 | 9136 | 2665 | 0.0010945710 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | VE2 | 368 | Differentiated_KC* | 0.04688790 | Differentiated_KC* | lesion | Epidermis | Female | NA |\n", + "| AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 5296 | 1860 | 0 | 11803 | 2101 | 0.0155045325 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | Differentiated_KC | 438 | Differentiated_KC | 0.67155350 | Differentiated_KC | lesion | Epidermis | Female | NA |\n", + "\n" + ], + "text/plain": [ + " orig.ident nCount_RNA nFeature_RNA\n", + "AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 SeuratProject 14602 3142 \n", + "AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 SeuratProject 13463 3311 \n", + "AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 SeuratProject 14551 3472 \n", + "AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 SeuratProject 11215 2632 \n", + "AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 SeuratProject 6150 2384 \n", + "AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 SeuratProject 5296 1860 \n", + " batch n_counts n_genes percent_mito donor\n", + "AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 0 27577 3474 0.0011241252 CTCL1\n", + "AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 0 21021 3642 0.0004281433 CTCL1\n", + "AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 0 27837 3824 0.0024787153 CTCL1\n", + "AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 0 17545 2943 0.0056426334 CTCL1\n", + "AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 0 9136 2665 0.0010945710 CTCL1\n", + "AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 0 11803 2101 0.0155045325 CTCL1\n", + " tech sample_type ⋯ broad_ct \n", + "AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n", + "AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n", + "AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n", + "AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n", + "AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n", + "AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n", + " ctpredicted_labels ctover_clustering\n", + "AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 286 \n", + "AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 492 \n", + "AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 535 \n", + "AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 298 \n", + "AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 VE2 368 \n", + "AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC 438 \n", + " ctmajority_voting ctconf_score\n", + "AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC 0.96744886 \n", + "AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 0.13851590 \n", + "AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC 0.01321663 \n", + "AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 0.90835925 \n", + "AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 0.04688790 \n", + "AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC 0.67155350 \n", + " cell_type site tissue Sex \n", + "AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC lesion Epidermis Female\n", + "AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* lesion Epidermis Female\n", + "AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC lesion Epidermis Female\n", + "AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* lesion Epidermis Female\n", + "AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* lesion Epidermis Female\n", + "AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC lesion Epidermis Female\n", + " type\n", + "AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 NA \n", + "AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 NA \n", + "AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 NA \n", + "AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 NA \n", + "AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 NA \n", + "AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 NA " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "head(seurat_object@meta.data)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "sacred-retreat", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<table class=\"dataframe\">\n", + "<caption>A data.frame: 6 × 2</caption>\n", + "<thead>\n", + "\t<tr><th></th><th scope=col>donor</th><th scope=col>cell_type</th></tr>\n", + "\t<tr><th></th><th scope=col><fct></th><th scope=col><fct></th></tr>\n", + "</thead>\n", + "<tbody>\n", + "\t<tr><th scope=row>AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1</th><td>CTCL1</td><td>Differentiated_KC </td></tr>\n", + "\t<tr><th scope=row>AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1</th><td>CTCL1</td><td>Differentiated_KC*</td></tr>\n", + "\t<tr><th scope=row>AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1</th><td>CTCL1</td><td>Differentiated_KC </td></tr>\n", + "\t<tr><th scope=row>AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1</th><td>CTCL1</td><td>Differentiated_KC*</td></tr>\n", + "\t<tr><th scope=row>AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1</th><td>CTCL1</td><td>Differentiated_KC*</td></tr>\n", + "\t<tr><th scope=row>AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1</th><td>CTCL1</td><td>Differentiated_KC </td></tr>\n", + "</tbody>\n", + "</table>\n" + ], + "text/latex": [ + "A data.frame: 6 × 2\n", + "\\begin{tabular}{r|ll}\n", + " & donor & cell\\_type\\\\\n", + " & <fct> & <fct>\\\\\n", + "\\hline\n", + "\tAAACCTGCATCACAAC-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC \\\\\n", + "\tAAACCTGCATGTAAGA-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC*\\\\\n", + "\tAAACGGGGTCGACTGC-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC \\\\\n", + "\tAAAGCAAAGCGTAATA-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC*\\\\\n", + "\tAAAGCAAGTCCTGCTT-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC*\\\\\n", + "\tAAAGCAATCCCATTTA-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC \\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A data.frame: 6 × 2\n", + "\n", + "| <!--/--> | donor <fct> | cell_type <fct> |\n", + "|---|---|---|\n", + "| AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC |\n", + "| AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC* |\n", + "| AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC |\n", + "| AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC* |\n", + "| AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC* |\n", + "| AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC |\n", + "\n" + ], + "text/plain": [ + " donor cell_type \n", + "AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC \n", + "AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC*\n", + "AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC \n", + "AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC*\n", + "AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC*\n", + "AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#pheno.matrix <- seurat_object@meta.data[,c(\"donor_id\",\"broad_cell_type\",\"anno\")]\n", + "pheno.matrix <- seurat_object@meta.data[,c(\"donor\",\"cell_type\")]\n", + "head(pheno.matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "surface-nevada", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<table class=\"dataframe\">\n", + "<caption>A data.frame: 2 × 1</caption>\n", + "<thead>\n", + "\t<tr><th></th><th scope=col>labelDescription</th></tr>\n", + "\t<tr><th></th><th scope=col><chr></th></tr>\n", + "</thead>\n", + "<tbody>\n", + "\t<tr><th scope=row>donor</th><td>Donor ID </td></tr>\n", + "\t<tr><th scope=row>cell_type</th><td>Cell type</td></tr>\n", + "</tbody>\n", + "</table>\n" + ], + "text/latex": [ + "A data.frame: 2 × 1\n", + "\\begin{tabular}{r|l}\n", + " & labelDescription\\\\\n", + " & <chr>\\\\\n", + "\\hline\n", + "\tdonor & Donor ID \\\\\n", + "\tcell\\_type & Cell type\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A data.frame: 2 × 1\n", + "\n", + "| <!--/--> | labelDescription <chr> |\n", + "|---|---|\n", + "| donor | Donor ID |\n", + "| cell_type | Cell type |\n", + "\n" + ], + "text/plain": [ + " labelDescription\n", + "donor Donor ID \n", + "cell_type Cell type " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "metadata <- data.frame(labelDescription= c(\"Donor ID\", \"Cell type\"), \n", + " row.names=c(\"donor\", \"cell_type\"))\n", + "metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "deadly-venture", + "metadata": {}, + "outputs": [], + "source": [ + "library(Matrix) \n", + "#expr_matrix <- integrated@assays$RNA@counts #sparsematrix\n", + "gene_exprs.matrix <- gene_exprs.matrix[tabulate(summary(gene_exprs.matrix)$i) != 0, , drop = FALSE] #remove any feature without a single count\n", + "#expr_matrix <- as.matrix(gene_exprs.matrix) #convert to matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "medical-settlement", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<style>\n", + ".list-inline {list-style: none; margin:0; padding: 0}\n", + ".list-inline>li {display: inline-block}\n", + ".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n", + "</style>\n", + "<ol class=list-inline><li>15767</li><li>62199</li></ol>\n" + ], + "text/latex": [ + "\\begin{enumerate*}\n", + "\\item 15767\n", + "\\item 62199\n", + "\\end{enumerate*}\n" + ], + "text/markdown": [ + "1. 15767\n", + "2. 62199\n", + "\n", + "\n" + ], + "text/plain": [ + "[1] 15767 62199" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "dim(gene_exprs.matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "dated-injection", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "n_slices=1\n", + "\n", + "converting slice 1/1\n", + "\n", + "columns 1:55815\n", + "\n", + "cbind dense submatrices\n", + "\n" + ] + } + ], + "source": [ + "expr_matrix_den <- SCOPfunctions::utils_big_as.matrix(gene_exprs.matrix, n_slices_init = 1, verbose = T) ## for large matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "conscious-terrorist", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Warning message in asMethod(object):\n", + "“sparse->dense coercion: allocating vector of size 7.3 GiB”\n" + ] + } + ], + "source": [ + "SC.eset = ExpressionSet(assayData = data.matrix(gene_exprs.matrix), \n", + " phenoData = new(\"AnnotatedDataFrame\", data = pheno.matrix, varMetadata = metadata))\n", + "#SC.eset = ExpressionSet(assayData = expr_matrix_den, \n", + "# phenoData = new(\"AnnotatedDataFrame\", data = pheno.matrix, varMetadata = metadata)) ## for large matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "final-hardwood", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ExpressionSet (storageMode: lockedEnvironment)\n", + "assayData: 15767 features, 62199 samples \n", + " element names: exprs \n", + "protocolData: none\n", + "phenoData\n", + " sampleNames: AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1\n", + " AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 ... GACGTGCTCACATACG-92_S5\n", + " (62199 total)\n", + " varLabels: donor cell_type\n", + " varMetadata: labelDescription\n", + "featureData: none\n", + "experimentData: use 'experimentData(object)'\n", + "Annotation: " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "SC.eset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "minimal-raising", + "metadata": {}, + "outputs": [], + "source": [ + "saveRDS(SC.eset, file=\"/lustre/scratch126/cellgen/team205/rl20/CTCL/object_revision/All_samples_final_20240707_sub0.08_for_deconv_ExpressionSet.rds\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "enormous-helping", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "4.0.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}