1275 lines (1274 with data), 56.2 kB
{
"cells": [
{
"cell_type": "markdown",
"id": "single-replacement",
"metadata": {},
"source": [
"### bulk ExpressionSet"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "abroad-circle",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading required package: GenomicRanges\n",
"\n",
"Loading required package: stats4\n",
"\n",
"Loading required package: BiocGenerics\n",
"\n",
"Loading required package: parallel\n",
"\n",
"\n",
"Attaching package: ‘BiocGenerics’\n",
"\n",
"\n",
"The following objects are masked from ‘package:parallel’:\n",
"\n",
" clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,\n",
" clusterExport, clusterMap, parApply, parCapply, parLapply,\n",
" parLapplyLB, parRapply, parSapply, parSapplyLB\n",
"\n",
"\n",
"The following objects are masked from ‘package:stats’:\n",
"\n",
" IQR, mad, sd, var, xtabs\n",
"\n",
"\n",
"The following objects are masked from ‘package:base’:\n",
"\n",
" anyDuplicated, append, as.data.frame, basename, cbind, colnames,\n",
" dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,\n",
" grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,\n",
" order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,\n",
" rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,\n",
" union, unique, unsplit, which, which.max, which.min\n",
"\n",
"\n",
"Loading required package: S4Vectors\n",
"\n",
"\n",
"Attaching package: ‘S4Vectors’\n",
"\n",
"\n",
"The following object is masked from ‘package:base’:\n",
"\n",
" expand.grid\n",
"\n",
"\n",
"Loading required package: IRanges\n",
"\n",
"Loading required package: GenomeInfoDb\n",
"\n",
"Loading required package: Biobase\n",
"\n",
"Welcome to Bioconductor\n",
"\n",
" Vignettes contain introductory material; view with\n",
" 'browseVignettes()'. To cite Bioconductor, see\n",
" 'citation(\"Biobase\")', and for packages 'citation(\"pkgname\")'.\n",
"\n",
"\n",
"Loading required package: DelayedArray\n",
"\n",
"Loading required package: matrixStats\n",
"\n",
"\n",
"Attaching package: ‘matrixStats’\n",
"\n",
"\n",
"The following objects are masked from ‘package:Biobase’:\n",
"\n",
" anyMissing, rowMedians\n",
"\n",
"\n",
"\n",
"Attaching package: ‘DelayedArray’\n",
"\n",
"\n",
"The following objects are masked from ‘package:matrixStats’:\n",
"\n",
" colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges\n",
"\n",
"\n",
"The following objects are masked from ‘package:base’:\n",
"\n",
" aperm, apply, rowsum\n",
"\n",
"\n"
]
}
],
"source": [
"library(SummarizedExperiment)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "experimental-disability",
"metadata": {},
"outputs": [],
"source": [
"Matrix <- read.table(\"/lustre/scratch117/cellgen/team205/rl20/CTCL/deconvolution/GSE121212_readcount_rmdup.txt\", \n",
" check.names=F) ### count matrix"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "graduate-mining",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"dataframe\">\n",
"<caption>A data.frame: 6 × 147</caption>\n",
"<thead>\n",
"\t<tr><th></th><th scope=col>AD_004_lesional</th><th scope=col>AD_004_non-lesional</th><th scope=col>AD_005_lesional</th><th scope=col>AD_005_non-lesional</th><th scope=col>AD_006_lesional</th><th scope=col>AD_006_non-lesional</th><th scope=col>AD_007_lesional</th><th scope=col>AD_007_non-lesional</th><th scope=col>AD_009_lesional</th><th scope=col>AD_009_non-lesional</th><th scope=col>⋯</th><th scope=col>AD_033_chronic_lesion</th><th scope=col>AD_034_non-lesional</th><th scope=col>AD_034_chronic_lesion</th><th scope=col>AD_032_chronic_lesion</th><th scope=col>AD_035_non-lesional</th><th scope=col>AD_035_chronic_lesion</th><th scope=col>AD_036_non-lesional</th><th scope=col>AD_036_chronic_lesion</th><th scope=col>AD_037_non-lesional</th><th scope=col>AD_037_chronic_lesion</th></tr>\n",
"\t<tr><th></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col>⋯</th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th><th scope=col><int></th></tr>\n",
"</thead>\n",
"<tbody>\n",
"\t<tr><th scope=row>5S_rRNA</th><td> 0</td><td> 3</td><td> 9</td><td> 12</td><td> 2</td><td> 3</td><td> 6</td><td> 4</td><td> 2</td><td> 5</td><td>⋯</td><td> 1</td><td> 2</td><td> 3</td><td> 3</td><td> 3</td><td> 4</td><td> 3</td><td> 5</td><td> 6</td><td> 2</td></tr>\n",
"\t<tr><th scope=row>7SK</th><td> 9</td><td> 6</td><td> 25</td><td> 25</td><td> 7</td><td> 8</td><td> 9</td><td> 8</td><td> 6</td><td> 15</td><td>⋯</td><td> 3</td><td> 8</td><td> 2</td><td> 5</td><td> 12</td><td> 5</td><td> 7</td><td> 5</td><td> 8</td><td> 3</td></tr>\n",
"\t<tr><th scope=row>A1BG</th><td> 2</td><td> 0</td><td> 4</td><td> 3</td><td> 1</td><td> 1</td><td> 0</td><td> 0</td><td> 4</td><td> 0</td><td>⋯</td><td> 1</td><td> 1</td><td> 2</td><td> 0</td><td> 1</td><td> 2</td><td> 0</td><td> 0</td><td> 1</td><td> 1</td></tr>\n",
"\t<tr><th scope=row>A1BG-AS1</th><td> 5</td><td> 0</td><td> 11</td><td> 8</td><td> 7</td><td> 8</td><td> 13</td><td> 2</td><td> 6</td><td> 19</td><td>⋯</td><td> 4</td><td> 0</td><td> 7</td><td> 7</td><td> 4</td><td> 4</td><td> 10</td><td> 13</td><td> 8</td><td> 4</td></tr>\n",
"\t<tr><th scope=row>A1CF</th><td> 2</td><td> 3</td><td> 2</td><td> 8</td><td> 1</td><td> 2</td><td> 0</td><td> 5</td><td> 1</td><td> 5</td><td>⋯</td><td> 3</td><td> 0</td><td> 1</td><td> 0</td><td> 0</td><td> 2</td><td> 0</td><td> 0</td><td> 1</td><td> 0</td></tr>\n",
"\t<tr><th scope=row>A2M</th><td>308</td><td>353</td><td>2544</td><td>1366</td><td>978</td><td>406</td><td>2761</td><td>928</td><td>2796</td><td>1891</td><td>⋯</td><td>1463</td><td>1391</td><td>1714</td><td>1998</td><td>683</td><td>1154</td><td>2361</td><td>2374</td><td>1040</td><td>2755</td></tr>\n",
"</tbody>\n",
"</table>\n"
],
"text/latex": [
"A data.frame: 6 × 147\n",
"\\begin{tabular}{r|lllllllllllllllllllll}\n",
" & AD\\_004\\_lesional & AD\\_004\\_non-lesional & AD\\_005\\_lesional & AD\\_005\\_non-lesional & AD\\_006\\_lesional & AD\\_006\\_non-lesional & AD\\_007\\_lesional & AD\\_007\\_non-lesional & AD\\_009\\_lesional & AD\\_009\\_non-lesional & ⋯ & AD\\_033\\_chronic\\_lesion & AD\\_034\\_non-lesional & AD\\_034\\_chronic\\_lesion & AD\\_032\\_chronic\\_lesion & AD\\_035\\_non-lesional & AD\\_035\\_chronic\\_lesion & AD\\_036\\_non-lesional & AD\\_036\\_chronic\\_lesion & AD\\_037\\_non-lesional & AD\\_037\\_chronic\\_lesion\\\\\n",
" & <int> & <int> & <int> & <int> & <int> & <int> & <int> & <int> & <int> & <int> & ⋯ & <int> & <int> & <int> & <int> & <int> & <int> & <int> & <int> & <int> & <int>\\\\\n",
"\\hline\n",
"\t5S\\_rRNA & 0 & 3 & 9 & 12 & 2 & 3 & 6 & 4 & 2 & 5 & ⋯ & 1 & 2 & 3 & 3 & 3 & 4 & 3 & 5 & 6 & 2\\\\\n",
"\t7SK & 9 & 6 & 25 & 25 & 7 & 8 & 9 & 8 & 6 & 15 & ⋯ & 3 & 8 & 2 & 5 & 12 & 5 & 7 & 5 & 8 & 3\\\\\n",
"\tA1BG & 2 & 0 & 4 & 3 & 1 & 1 & 0 & 0 & 4 & 0 & ⋯ & 1 & 1 & 2 & 0 & 1 & 2 & 0 & 0 & 1 & 1\\\\\n",
"\tA1BG-AS1 & 5 & 0 & 11 & 8 & 7 & 8 & 13 & 2 & 6 & 19 & ⋯ & 4 & 0 & 7 & 7 & 4 & 4 & 10 & 13 & 8 & 4\\\\\n",
"\tA1CF & 2 & 3 & 2 & 8 & 1 & 2 & 0 & 5 & 1 & 5 & ⋯ & 3 & 0 & 1 & 0 & 0 & 2 & 0 & 0 & 1 & 0\\\\\n",
"\tA2M & 308 & 353 & 2544 & 1366 & 978 & 406 & 2761 & 928 & 2796 & 1891 & ⋯ & 1463 & 1391 & 1714 & 1998 & 683 & 1154 & 2361 & 2374 & 1040 & 2755\\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A data.frame: 6 × 147\n",
"\n",
"| <!--/--> | AD_004_lesional <int> | AD_004_non-lesional <int> | AD_005_lesional <int> | AD_005_non-lesional <int> | AD_006_lesional <int> | AD_006_non-lesional <int> | AD_007_lesional <int> | AD_007_non-lesional <int> | AD_009_lesional <int> | AD_009_non-lesional <int> | ⋯ ⋯ | AD_033_chronic_lesion <int> | AD_034_non-lesional <int> | AD_034_chronic_lesion <int> | AD_032_chronic_lesion <int> | AD_035_non-lesional <int> | AD_035_chronic_lesion <int> | AD_036_non-lesional <int> | AD_036_chronic_lesion <int> | AD_037_non-lesional <int> | AD_037_chronic_lesion <int> |\n",
"|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
"| 5S_rRNA | 0 | 3 | 9 | 12 | 2 | 3 | 6 | 4 | 2 | 5 | ⋯ | 1 | 2 | 3 | 3 | 3 | 4 | 3 | 5 | 6 | 2 |\n",
"| 7SK | 9 | 6 | 25 | 25 | 7 | 8 | 9 | 8 | 6 | 15 | ⋯ | 3 | 8 | 2 | 5 | 12 | 5 | 7 | 5 | 8 | 3 |\n",
"| A1BG | 2 | 0 | 4 | 3 | 1 | 1 | 0 | 0 | 4 | 0 | ⋯ | 1 | 1 | 2 | 0 | 1 | 2 | 0 | 0 | 1 | 1 |\n",
"| A1BG-AS1 | 5 | 0 | 11 | 8 | 7 | 8 | 13 | 2 | 6 | 19 | ⋯ | 4 | 0 | 7 | 7 | 4 | 4 | 10 | 13 | 8 | 4 |\n",
"| A1CF | 2 | 3 | 2 | 8 | 1 | 2 | 0 | 5 | 1 | 5 | ⋯ | 3 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 1 | 0 |\n",
"| A2M | 308 | 353 | 2544 | 1366 | 978 | 406 | 2761 | 928 | 2796 | 1891 | ⋯ | 1463 | 1391 | 1714 | 1998 | 683 | 1154 | 2361 | 2374 | 1040 | 2755 |\n",
"\n"
],
"text/plain": [
" AD_004_lesional AD_004_non-lesional AD_005_lesional\n",
"5S_rRNA 0 3 9 \n",
"7SK 9 6 25 \n",
"A1BG 2 0 4 \n",
"A1BG-AS1 5 0 11 \n",
"A1CF 2 3 2 \n",
"A2M 308 353 2544 \n",
" AD_005_non-lesional AD_006_lesional AD_006_non-lesional\n",
"5S_rRNA 12 2 3 \n",
"7SK 25 7 8 \n",
"A1BG 3 1 1 \n",
"A1BG-AS1 8 7 8 \n",
"A1CF 8 1 2 \n",
"A2M 1366 978 406 \n",
" AD_007_lesional AD_007_non-lesional AD_009_lesional\n",
"5S_rRNA 6 4 2 \n",
"7SK 9 8 6 \n",
"A1BG 0 0 4 \n",
"A1BG-AS1 13 2 6 \n",
"A1CF 0 5 1 \n",
"A2M 2761 928 2796 \n",
" AD_009_non-lesional ⋯ AD_033_chronic_lesion AD_034_non-lesional\n",
"5S_rRNA 5 ⋯ 1 2 \n",
"7SK 15 ⋯ 3 8 \n",
"A1BG 0 ⋯ 1 1 \n",
"A1BG-AS1 19 ⋯ 4 0 \n",
"A1CF 5 ⋯ 3 0 \n",
"A2M 1891 ⋯ 1463 1391 \n",
" AD_034_chronic_lesion AD_032_chronic_lesion AD_035_non-lesional\n",
"5S_rRNA 3 3 3 \n",
"7SK 2 5 12 \n",
"A1BG 2 0 1 \n",
"A1BG-AS1 7 7 4 \n",
"A1CF 1 0 0 \n",
"A2M 1714 1998 683 \n",
" AD_035_chronic_lesion AD_036_non-lesional AD_036_chronic_lesion\n",
"5S_rRNA 4 3 5 \n",
"7SK 5 7 5 \n",
"A1BG 2 0 0 \n",
"A1BG-AS1 4 10 13 \n",
"A1CF 2 0 0 \n",
"A2M 1154 2361 2374 \n",
" AD_037_non-lesional AD_037_chronic_lesion\n",
"5S_rRNA 6 2 \n",
"7SK 8 3 \n",
"A1BG 1 1 \n",
"A1BG-AS1 8 4 \n",
"A1CF 1 0 \n",
"A2M 1040 2755 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<style>\n",
".list-inline {list-style: none; margin:0; padding: 0}\n",
".list-inline>li {display: inline-block}\n",
".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n",
"</style>\n",
"<ol class=list-inline><li>31362</li><li>147</li></ol>\n"
],
"text/latex": [
"\\begin{enumerate*}\n",
"\\item 31362\n",
"\\item 147\n",
"\\end{enumerate*}\n"
],
"text/markdown": [
"1. 31362\n",
"2. 147\n",
"\n",
"\n"
],
"text/plain": [
"[1] 31362 147"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"head(Matrix);dim(Matrix)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "jewish-sessions",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>\n",
".list-inline {list-style: none; margin:0; padding: 0}\n",
".list-inline>li {display: inline-block}\n",
".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n",
"</style>\n",
"<ol class=list-inline><li>31362</li><li>147</li></ol>\n"
],
"text/latex": [
"\\begin{enumerate*}\n",
"\\item 31362\n",
"\\item 147\n",
"\\end{enumerate*}\n"
],
"text/markdown": [
"1. 31362\n",
"2. 147\n",
"\n",
"\n"
],
"text/plain": [
"[1] 31362 147"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"is_genename_uniq <- !duplicated(rownames(Matrix))\n",
"Matrix <- Matrix[is_genename_uniq, ]\n",
"dim(Matrix)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "professional-pattern",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [],
"text/latex": [],
"text/markdown": [],
"text/plain": [
"named integer(0)"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"tt <- apply(Matrix, 2, sum)\n",
"del <- which(tt==0)\n",
"del"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "excess-salvation",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"dataframe\">\n",
"<caption>A data.frame: 6 × 1</caption>\n",
"<thead>\n",
"\t<tr><th></th><th scope=col>gene_name</th></tr>\n",
"\t<tr><th></th><th scope=col><chr></th></tr>\n",
"</thead>\n",
"<tbody>\n",
"\t<tr><th scope=row>5S_rRNA</th><td>5S_rRNA </td></tr>\n",
"\t<tr><th scope=row>7SK</th><td>7SK </td></tr>\n",
"\t<tr><th scope=row>A1BG</th><td>A1BG </td></tr>\n",
"\t<tr><th scope=row>A1BG-AS1</th><td>A1BG-AS1</td></tr>\n",
"\t<tr><th scope=row>A1CF</th><td>A1CF </td></tr>\n",
"\t<tr><th scope=row>A2M</th><td>A2M </td></tr>\n",
"</tbody>\n",
"</table>\n"
],
"text/latex": [
"A data.frame: 6 × 1\n",
"\\begin{tabular}{r|l}\n",
" & gene\\_name\\\\\n",
" & <chr>\\\\\n",
"\\hline\n",
"\t5S\\_rRNA & 5S\\_rRNA \\\\\n",
"\t7SK & 7SK \\\\\n",
"\tA1BG & A1BG \\\\\n",
"\tA1BG-AS1 & A1BG-AS1\\\\\n",
"\tA1CF & A1CF \\\\\n",
"\tA2M & A2M \\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A data.frame: 6 × 1\n",
"\n",
"| <!--/--> | gene_name <chr> |\n",
"|---|---|\n",
"| 5S_rRNA | 5S_rRNA |\n",
"| 7SK | 7SK |\n",
"| A1BG | A1BG |\n",
"| A1BG-AS1 | A1BG-AS1 |\n",
"| A1CF | A1CF |\n",
"| A2M | A2M |\n",
"\n"
],
"text/plain": [
" gene_name\n",
"5S_rRNA 5S_rRNA \n",
"7SK 7SK \n",
"A1BG A1BG \n",
"A1BG-AS1 A1BG-AS1 \n",
"A1CF A1CF \n",
"A2M A2M "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fdat_df <- data.frame(gene_name=rownames(Matrix), stringsAsFactors=F)\n",
"rownames(fdat_df) <- rownames(Matrix)\n",
"metadata_fdat_df <- data.frame(labelDescription= c(\"gene name\"), row.names=c(\"gene_name\"))\n",
"head(fdat_df)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "collaborative-inspiration",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"dataframe\">\n",
"<caption>A data.frame: 6 × 2</caption>\n",
"<thead>\n",
"\t<tr><th></th><th scope=col>sample_type</th><th scope=col>condition</th></tr>\n",
"\t<tr><th></th><th scope=col><chr></th><th scope=col><chr></th></tr>\n",
"</thead>\n",
"<tbody>\n",
"\t<tr><th scope=row>AD_004_lesional</th><td>AD_L </td><td>AD</td></tr>\n",
"\t<tr><th scope=row>AD_004_non-lesional</th><td>AD_NL</td><td>AD</td></tr>\n",
"\t<tr><th scope=row>AD_005_lesional</th><td>AD_L </td><td>AD</td></tr>\n",
"\t<tr><th scope=row>AD_005_non-lesional</th><td>AD_NL</td><td>AD</td></tr>\n",
"\t<tr><th scope=row>AD_006_lesional</th><td>AD_L </td><td>AD</td></tr>\n",
"\t<tr><th scope=row>AD_006_non-lesional</th><td>AD_NL</td><td>AD</td></tr>\n",
"</tbody>\n",
"</table>\n"
],
"text/latex": [
"A data.frame: 6 × 2\n",
"\\begin{tabular}{r|ll}\n",
" & sample\\_type & condition\\\\\n",
" & <chr> & <chr>\\\\\n",
"\\hline\n",
"\tAD\\_004\\_lesional & AD\\_L & AD\\\\\n",
"\tAD\\_004\\_non-lesional & AD\\_NL & AD\\\\\n",
"\tAD\\_005\\_lesional & AD\\_L & AD\\\\\n",
"\tAD\\_005\\_non-lesional & AD\\_NL & AD\\\\\n",
"\tAD\\_006\\_lesional & AD\\_L & AD\\\\\n",
"\tAD\\_006\\_non-lesional & AD\\_NL & AD\\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A data.frame: 6 × 2\n",
"\n",
"| <!--/--> | sample_type <chr> | condition <chr> |\n",
"|---|---|---|\n",
"| AD_004_lesional | AD_L | AD |\n",
"| AD_004_non-lesional | AD_NL | AD |\n",
"| AD_005_lesional | AD_L | AD |\n",
"| AD_005_non-lesional | AD_NL | AD |\n",
"| AD_006_lesional | AD_L | AD |\n",
"| AD_006_non-lesional | AD_NL | AD |\n",
"\n"
],
"text/plain": [
" sample_type condition\n",
"AD_004_lesional AD_L AD \n",
"AD_004_non-lesional AD_NL AD \n",
"AD_005_lesional AD_L AD \n",
"AD_005_non-lesional AD_NL AD \n",
"AD_006_lesional AD_L AD \n",
"AD_006_non-lesional AD_NL AD "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"metadata <- read.table(\"/lustre/scratch117/cellgen/team205/rl20/CTCL/deconvolution/GSE121212_meta.xls\", \n",
" check.names=F, sep=\"\\t\", header = T)\n",
"head(metadata)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "declared-cookbook",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"dataframe\">\n",
"<caption>A data.frame: 6 × 2</caption>\n",
"<thead>\n",
"\t<tr><th></th><th scope=col>sample_type</th><th scope=col>condition</th></tr>\n",
"\t<tr><th></th><th scope=col><chr></th><th scope=col><chr></th></tr>\n",
"</thead>\n",
"<tbody>\n",
"\t<tr><th scope=row>AD_004_lesional</th><td>AD_L </td><td>AD</td></tr>\n",
"\t<tr><th scope=row>AD_004_non-lesional</th><td>AD_NL</td><td>AD</td></tr>\n",
"\t<tr><th scope=row>AD_005_lesional</th><td>AD_L </td><td>AD</td></tr>\n",
"\t<tr><th scope=row>AD_005_non-lesional</th><td>AD_NL</td><td>AD</td></tr>\n",
"\t<tr><th scope=row>AD_006_lesional</th><td>AD_L </td><td>AD</td></tr>\n",
"\t<tr><th scope=row>AD_006_non-lesional</th><td>AD_NL</td><td>AD</td></tr>\n",
"</tbody>\n",
"</table>\n"
],
"text/latex": [
"A data.frame: 6 × 2\n",
"\\begin{tabular}{r|ll}\n",
" & sample\\_type & condition\\\\\n",
" & <chr> & <chr>\\\\\n",
"\\hline\n",
"\tAD\\_004\\_lesional & AD\\_L & AD\\\\\n",
"\tAD\\_004\\_non-lesional & AD\\_NL & AD\\\\\n",
"\tAD\\_005\\_lesional & AD\\_L & AD\\\\\n",
"\tAD\\_005\\_non-lesional & AD\\_NL & AD\\\\\n",
"\tAD\\_006\\_lesional & AD\\_L & AD\\\\\n",
"\tAD\\_006\\_non-lesional & AD\\_NL & AD\\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A data.frame: 6 × 2\n",
"\n",
"| <!--/--> | sample_type <chr> | condition <chr> |\n",
"|---|---|---|\n",
"| AD_004_lesional | AD_L | AD |\n",
"| AD_004_non-lesional | AD_NL | AD |\n",
"| AD_005_lesional | AD_L | AD |\n",
"| AD_005_non-lesional | AD_NL | AD |\n",
"| AD_006_lesional | AD_L | AD |\n",
"| AD_006_non-lesional | AD_NL | AD |\n",
"\n"
],
"text/plain": [
" sample_type condition\n",
"AD_004_lesional AD_L AD \n",
"AD_004_non-lesional AD_NL AD \n",
"AD_005_lesional AD_L AD \n",
"AD_005_non-lesional AD_NL AD \n",
"AD_006_lesional AD_L AD \n",
"AD_006_non-lesional AD_NL AD "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"pdat_df <- data.frame(sample_type = metadata$sample_type, \n",
" condition = metadata$condition,\n",
" stringsAsFactors = F)\n",
"rownames(pdat_df) <- rownames(metadata)\n",
"head(pdat_df)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "following-sleeve",
"metadata": {},
"outputs": [],
"source": [
"metadata_pdat_df <- data.frame(labelDescription= c(\"lesion_nonlesion\", \"Condition\"), \n",
" row.names=c(\"sample_type\", \"condition\"))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "racial-modem",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"dataframe\">\n",
"<caption>A data.frame: 2 × 1</caption>\n",
"<thead>\n",
"\t<tr><th></th><th scope=col>labelDescription</th></tr>\n",
"\t<tr><th></th><th scope=col><chr></th></tr>\n",
"</thead>\n",
"<tbody>\n",
"\t<tr><th scope=row>sample_type</th><td>lesion_nonlesion</td></tr>\n",
"\t<tr><th scope=row>condition</th><td>Condition </td></tr>\n",
"</tbody>\n",
"</table>\n"
],
"text/latex": [
"A data.frame: 2 × 1\n",
"\\begin{tabular}{r|l}\n",
" & labelDescription\\\\\n",
" & <chr>\\\\\n",
"\\hline\n",
"\tsample\\_type & lesion\\_nonlesion\\\\\n",
"\tcondition & Condition \\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A data.frame: 2 × 1\n",
"\n",
"| <!--/--> | labelDescription <chr> |\n",
"|---|---|\n",
"| sample_type | lesion_nonlesion |\n",
"| condition | Condition |\n",
"\n"
],
"text/plain": [
" labelDescription\n",
"sample_type lesion_nonlesion\n",
"condition Condition "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"metadata_pdat_df"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "pursuant-switch",
"metadata": {},
"outputs": [],
"source": [
"Matrix <- Matrix[ ,rownames(pdat_df)] ### the order of rownames(pdat_df) and colnames(Matrix) might be different"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "comfortable-prague",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"TRUE"
],
"text/latex": [
"TRUE"
],
"text/markdown": [
"TRUE"
],
"text/plain": [
"[1] TRUE"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"all.equal(colnames(Matrix), rownames(pdat_df))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "ruled-spencer",
"metadata": {},
"outputs": [],
"source": [
"bulk_eset <- ExpressionSet(\n",
" assayData = data.matrix(Matrix),\n",
" phenoData=new(\"AnnotatedDataFrame\", data = pdat_df, varMetadata = metadata_pdat_df),\n",
" featureData=new(\"AnnotatedDataFrame\", data = fdat_df, varMetadata = metadata_fdat_df))"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "renewable-mentor",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ExpressionSet (storageMode: lockedEnvironment)\n",
"assayData: 31362 features, 147 samples \n",
" element names: exprs \n",
"protocolData: none\n",
"phenoData\n",
" sampleNames: AD_004_lesional AD_004_non-lesional ...\n",
" AD_037_chronic_lesion (147 total)\n",
" varLabels: sample_type condition\n",
" varMetadata: labelDescription\n",
"featureData\n",
" featureNames: 5S_rRNA 7SK ... snoZ5 (31362 total)\n",
" fvarLabels: gene_name\n",
" fvarMetadata: labelDescription\n",
"experimentData: use 'experimentData(object)'\n",
"Annotation: "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"bulk_eset"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "personalized-power",
"metadata": {},
"outputs": [],
"source": [
"saveRDS(bulk_eset, file=\"/lustre/scratch117/cellgen/team205/rl20/CTCL/deconvolution/GSE121212_readcount_ExpressionSet.rds\")"
]
},
{
"cell_type": "markdown",
"id": "floral-undergraduate",
"metadata": {},
"source": [
"### Single cell ExpressionSet"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "passive-sponsorship",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading required package: reticulate\n",
"\n",
"Attaching SeuratObject\n",
"\n",
"\n",
"Attaching package: ‘Seurat’\n",
"\n",
"\n",
"The following object is masked from ‘package:SummarizedExperiment’:\n",
"\n",
" Assays\n",
"\n",
"\n",
"\n",
"Attaching package: ‘Matrix’\n",
"\n",
"\n",
"The following object is masked from ‘package:S4Vectors’:\n",
"\n",
" expand\n",
"\n",
"\n"
]
}
],
"source": [
"library(sceasy)\n",
"library(reticulate)\n",
"library(anndata)\n",
"library(Seurat)\n",
"library(BisqueRNA)\n",
"library(Biobase)\n",
"library(Matrix)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "determined-permit",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Warning message in asMethod(object):\n",
"“sparse->dense coercion: allocating vector of size 7.3 GiB”\n"
]
}
],
"source": [
"h5ad_file <- \"/lustre/scratch126/cellgen/team205/rl20/CTCL/object_revision/All_samples_final_20240707_sub0.08_for_deconv.h5ad\"\n",
"sdata <- read_h5ad(h5ad_file)\n",
"seurat_object <- CreateSeuratObject(counts = t(as.matrix(sdata$X)), meta.data = sdata$obs)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "criminal-manor",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"An object of class Seurat \n",
"15777 features across 62199 samples within 1 assay \n",
"Active assay: RNA (15777 features, 0 variable features)"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"seurat_object"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "beginning-tongue",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" [[ suppressing 20 column names ‘AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1’, ‘AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1’, ‘AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1’ ... ]]\n",
"\n"
]
},
{
"data": {
"text/plain": [
"20 x 20 sparse Matrix of class \"dgCMatrix\"\n",
" \n",
"SAMD11 . . . . . . . . . . . . . . . . . . . .\n",
"NOC2L 1 1 . . 2 . . 1 . . . 3 1 . . . . . . .\n",
"KLHL17 . . . . . . . . . . . . . . . . . . . .\n",
"PLEKHN1 2 . . . . . . . 1 . . 1 . . . . . . . .\n",
"HES4 . . 1 . . . . . . . 2 . . . . . 1 . . .\n",
"ISG15 . . . . . . . 1 . . . . . . . . . . . .\n",
"AGRN . . . . . . . . . 1 . . . . 1 . . . . .\n",
"RNF223 . . . . . . . . . . . . . . . . . . . .\n",
"C1orf159 . . . . . . . . . . 1 . . . . . . . . .\n",
"TTLL10 . . . . . . . . . . . . . . . . . . . .\n",
"TNFRSF18 3 1 1 . 1 . . 5 . . . 1 3 . . 1 . . . .\n",
"TNFRSF4 . . . . . . . 3 . 1 . . . . . . . . . .\n",
"SDF4 6 1 5 1 1 3 . 2 . . . 3 . 2 2 1 1 . . .\n",
"B3GALT6 . . . . . . . . . . . . . . . . . . . .\n",
"C1QTNF12 . . 1 . . . . . . . . . . . 1 . . . 1 .\n",
"UBE2J2 . 1 . . 3 . . 1 . . 4 1 . 1 . . 2 . . .\n",
"SCNN1D . . 1 . . . . . . . . . . . . . . . . .\n",
"ACAP3 . . . . . 1 . . . . . . . . . . . . . .\n",
"PUSL1 . . . . . . . . 1 . . . . . . . . . . .\n",
"INTS11 3 . . . . 1 . . . . 2 . . 2 1 1 . . . ."
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"gene_exprs.matrix <- seurat_object@assays$RNA@counts\n",
"gene_exprs.matrix[1:20,1:20]\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "marine-philosophy",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"dataframe\">\n",
"<caption>A data.frame: 6 × 22</caption>\n",
"<thead>\n",
"\t<tr><th></th><th scope=col>orig.ident</th><th scope=col>nCount_RNA</th><th scope=col>nFeature_RNA</th><th scope=col>batch</th><th scope=col>n_counts</th><th scope=col>n_genes</th><th scope=col>percent_mito</th><th scope=col>donor</th><th scope=col>tech</th><th scope=col>sample_type</th><th scope=col>⋯</th><th scope=col>broad_ct</th><th scope=col>ctpredicted_labels</th><th scope=col>ctover_clustering</th><th scope=col>ctmajority_voting</th><th scope=col>ctconf_score</th><th scope=col>cell_type</th><th scope=col>site</th><th scope=col>tissue</th><th scope=col>Sex</th><th scope=col>type</th></tr>\n",
"\t<tr><th></th><th scope=col><fct></th><th scope=col><dbl></th><th scope=col><int></th><th scope=col><fct></th><th scope=col><dbl></th><th scope=col><dbl></th><th scope=col><dbl></th><th scope=col><fct></th><th scope=col><fct></th><th scope=col><fct></th><th scope=col>⋯</th><th scope=col><fct></th><th scope=col><fct></th><th scope=col><fct></th><th scope=col><fct></th><th scope=col><dbl></th><th scope=col><fct></th><th scope=col><fct></th><th scope=col><fct></th><th scope=col><fct></th><th scope=col><fct></th></tr>\n",
"</thead>\n",
"<tbody>\n",
"\t<tr><th scope=row>AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1</th><td>SeuratProject</td><td>14602</td><td>3142</td><td>0</td><td>27577</td><td>3474</td><td>0.0011241252</td><td>CTCL1</td><td>10x</td><td>CTCL</td><td>⋯</td><td>Keratinocytes</td><td>Differentiated_KC*</td><td>286</td><td>Differentiated_KC </td><td>0.96744886</td><td>Differentiated_KC </td><td>lesion</td><td>Epidermis</td><td>Female</td><td>NA</td></tr>\n",
"\t<tr><th scope=row>AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1</th><td>SeuratProject</td><td>13463</td><td>3311</td><td>0</td><td>21021</td><td>3642</td><td>0.0004281433</td><td>CTCL1</td><td>10x</td><td>CTCL</td><td>⋯</td><td>Keratinocytes</td><td>Differentiated_KC*</td><td>492</td><td>Differentiated_KC*</td><td>0.13851590</td><td>Differentiated_KC*</td><td>lesion</td><td>Epidermis</td><td>Female</td><td>NA</td></tr>\n",
"\t<tr><th scope=row>AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1</th><td>SeuratProject</td><td>14551</td><td>3472</td><td>0</td><td>27837</td><td>3824</td><td>0.0024787153</td><td>CTCL1</td><td>10x</td><td>CTCL</td><td>⋯</td><td>Keratinocytes</td><td>Differentiated_KC*</td><td>535</td><td>Differentiated_KC </td><td>0.01321663</td><td>Differentiated_KC </td><td>lesion</td><td>Epidermis</td><td>Female</td><td>NA</td></tr>\n",
"\t<tr><th scope=row>AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1</th><td>SeuratProject</td><td>11215</td><td>2632</td><td>0</td><td>17545</td><td>2943</td><td>0.0056426334</td><td>CTCL1</td><td>10x</td><td>CTCL</td><td>⋯</td><td>Keratinocytes</td><td>Differentiated_KC*</td><td>298</td><td>Differentiated_KC*</td><td>0.90835925</td><td>Differentiated_KC*</td><td>lesion</td><td>Epidermis</td><td>Female</td><td>NA</td></tr>\n",
"\t<tr><th scope=row>AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1</th><td>SeuratProject</td><td> 6150</td><td>2384</td><td>0</td><td> 9136</td><td>2665</td><td>0.0010945710</td><td>CTCL1</td><td>10x</td><td>CTCL</td><td>⋯</td><td>Keratinocytes</td><td>VE2 </td><td>368</td><td>Differentiated_KC*</td><td>0.04688790</td><td>Differentiated_KC*</td><td>lesion</td><td>Epidermis</td><td>Female</td><td>NA</td></tr>\n",
"\t<tr><th scope=row>AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1</th><td>SeuratProject</td><td> 5296</td><td>1860</td><td>0</td><td>11803</td><td>2101</td><td>0.0155045325</td><td>CTCL1</td><td>10x</td><td>CTCL</td><td>⋯</td><td>Keratinocytes</td><td>Differentiated_KC </td><td>438</td><td>Differentiated_KC </td><td>0.67155350</td><td>Differentiated_KC </td><td>lesion</td><td>Epidermis</td><td>Female</td><td>NA</td></tr>\n",
"</tbody>\n",
"</table>\n"
],
"text/latex": [
"A data.frame: 6 × 22\n",
"\\begin{tabular}{r|lllllllllllllllllllll}\n",
" & orig.ident & nCount\\_RNA & nFeature\\_RNA & batch & n\\_counts & n\\_genes & percent\\_mito & donor & tech & sample\\_type & ⋯ & broad\\_ct & ctpredicted\\_labels & ctover\\_clustering & ctmajority\\_voting & ctconf\\_score & cell\\_type & site & tissue & Sex & type\\\\\n",
" & <fct> & <dbl> & <int> & <fct> & <dbl> & <dbl> & <dbl> & <fct> & <fct> & <fct> & ⋯ & <fct> & <fct> & <fct> & <fct> & <dbl> & <fct> & <fct> & <fct> & <fct> & <fct>\\\\\n",
"\\hline\n",
"\tAAACCTGCATCACAAC-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 14602 & 3142 & 0 & 27577 & 3474 & 0.0011241252 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & Differentiated\\_KC* & 286 & Differentiated\\_KC & 0.96744886 & Differentiated\\_KC & lesion & Epidermis & Female & NA\\\\\n",
"\tAAACCTGCATGTAAGA-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 13463 & 3311 & 0 & 21021 & 3642 & 0.0004281433 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & Differentiated\\_KC* & 492 & Differentiated\\_KC* & 0.13851590 & Differentiated\\_KC* & lesion & Epidermis & Female & NA\\\\\n",
"\tAAACGGGGTCGACTGC-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 14551 & 3472 & 0 & 27837 & 3824 & 0.0024787153 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & Differentiated\\_KC* & 535 & Differentiated\\_KC & 0.01321663 & Differentiated\\_KC & lesion & Epidermis & Female & NA\\\\\n",
"\tAAAGCAAAGCGTAATA-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 11215 & 2632 & 0 & 17545 & 2943 & 0.0056426334 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & Differentiated\\_KC* & 298 & Differentiated\\_KC* & 0.90835925 & Differentiated\\_KC* & lesion & Epidermis & Female & NA\\\\\n",
"\tAAAGCAAGTCCTGCTT-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 6150 & 2384 & 0 & 9136 & 2665 & 0.0010945710 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & VE2 & 368 & Differentiated\\_KC* & 0.04688790 & Differentiated\\_KC* & lesion & Epidermis & Female & NA\\\\\n",
"\tAAAGCAATCCCATTTA-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 5296 & 1860 & 0 & 11803 & 2101 & 0.0155045325 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & Differentiated\\_KC & 438 & Differentiated\\_KC & 0.67155350 & Differentiated\\_KC & lesion & Epidermis & Female & NA\\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A data.frame: 6 × 22\n",
"\n",
"| <!--/--> | orig.ident <fct> | nCount_RNA <dbl> | nFeature_RNA <int> | batch <fct> | n_counts <dbl> | n_genes <dbl> | percent_mito <dbl> | donor <fct> | tech <fct> | sample_type <fct> | ⋯ ⋯ | broad_ct <fct> | ctpredicted_labels <fct> | ctover_clustering <fct> | ctmajority_voting <fct> | ctconf_score <dbl> | cell_type <fct> | site <fct> | tissue <fct> | Sex <fct> | type <fct> |\n",
"|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
"| AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 14602 | 3142 | 0 | 27577 | 3474 | 0.0011241252 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | Differentiated_KC* | 286 | Differentiated_KC | 0.96744886 | Differentiated_KC | lesion | Epidermis | Female | NA |\n",
"| AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 13463 | 3311 | 0 | 21021 | 3642 | 0.0004281433 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | Differentiated_KC* | 492 | Differentiated_KC* | 0.13851590 | Differentiated_KC* | lesion | Epidermis | Female | NA |\n",
"| AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 14551 | 3472 | 0 | 27837 | 3824 | 0.0024787153 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | Differentiated_KC* | 535 | Differentiated_KC | 0.01321663 | Differentiated_KC | lesion | Epidermis | Female | NA |\n",
"| AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 11215 | 2632 | 0 | 17545 | 2943 | 0.0056426334 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | Differentiated_KC* | 298 | Differentiated_KC* | 0.90835925 | Differentiated_KC* | lesion | Epidermis | Female | NA |\n",
"| AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 6150 | 2384 | 0 | 9136 | 2665 | 0.0010945710 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | VE2 | 368 | Differentiated_KC* | 0.04688790 | Differentiated_KC* | lesion | Epidermis | Female | NA |\n",
"| AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 5296 | 1860 | 0 | 11803 | 2101 | 0.0155045325 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | Differentiated_KC | 438 | Differentiated_KC | 0.67155350 | Differentiated_KC | lesion | Epidermis | Female | NA |\n",
"\n"
],
"text/plain": [
" orig.ident nCount_RNA nFeature_RNA\n",
"AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 SeuratProject 14602 3142 \n",
"AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 SeuratProject 13463 3311 \n",
"AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 SeuratProject 14551 3472 \n",
"AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 SeuratProject 11215 2632 \n",
"AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 SeuratProject 6150 2384 \n",
"AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 SeuratProject 5296 1860 \n",
" batch n_counts n_genes percent_mito donor\n",
"AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 0 27577 3474 0.0011241252 CTCL1\n",
"AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 0 21021 3642 0.0004281433 CTCL1\n",
"AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 0 27837 3824 0.0024787153 CTCL1\n",
"AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 0 17545 2943 0.0056426334 CTCL1\n",
"AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 0 9136 2665 0.0010945710 CTCL1\n",
"AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 0 11803 2101 0.0155045325 CTCL1\n",
" tech sample_type ⋯ broad_ct \n",
"AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n",
"AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n",
"AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n",
"AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n",
"AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n",
"AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n",
" ctpredicted_labels ctover_clustering\n",
"AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 286 \n",
"AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 492 \n",
"AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 535 \n",
"AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 298 \n",
"AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 VE2 368 \n",
"AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC 438 \n",
" ctmajority_voting ctconf_score\n",
"AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC 0.96744886 \n",
"AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 0.13851590 \n",
"AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC 0.01321663 \n",
"AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 0.90835925 \n",
"AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 0.04688790 \n",
"AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC 0.67155350 \n",
" cell_type site tissue Sex \n",
"AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC lesion Epidermis Female\n",
"AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* lesion Epidermis Female\n",
"AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC lesion Epidermis Female\n",
"AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* lesion Epidermis Female\n",
"AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* lesion Epidermis Female\n",
"AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC lesion Epidermis Female\n",
" type\n",
"AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 NA \n",
"AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 NA \n",
"AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 NA \n",
"AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 NA \n",
"AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 NA \n",
"AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 NA "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"head(seurat_object@meta.data)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "sacred-retreat",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"dataframe\">\n",
"<caption>A data.frame: 6 × 2</caption>\n",
"<thead>\n",
"\t<tr><th></th><th scope=col>donor</th><th scope=col>cell_type</th></tr>\n",
"\t<tr><th></th><th scope=col><fct></th><th scope=col><fct></th></tr>\n",
"</thead>\n",
"<tbody>\n",
"\t<tr><th scope=row>AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1</th><td>CTCL1</td><td>Differentiated_KC </td></tr>\n",
"\t<tr><th scope=row>AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1</th><td>CTCL1</td><td>Differentiated_KC*</td></tr>\n",
"\t<tr><th scope=row>AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1</th><td>CTCL1</td><td>Differentiated_KC </td></tr>\n",
"\t<tr><th scope=row>AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1</th><td>CTCL1</td><td>Differentiated_KC*</td></tr>\n",
"\t<tr><th scope=row>AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1</th><td>CTCL1</td><td>Differentiated_KC*</td></tr>\n",
"\t<tr><th scope=row>AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1</th><td>CTCL1</td><td>Differentiated_KC </td></tr>\n",
"</tbody>\n",
"</table>\n"
],
"text/latex": [
"A data.frame: 6 × 2\n",
"\\begin{tabular}{r|ll}\n",
" & donor & cell\\_type\\\\\n",
" & <fct> & <fct>\\\\\n",
"\\hline\n",
"\tAAACCTGCATCACAAC-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC \\\\\n",
"\tAAACCTGCATGTAAGA-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC*\\\\\n",
"\tAAACGGGGTCGACTGC-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC \\\\\n",
"\tAAAGCAAAGCGTAATA-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC*\\\\\n",
"\tAAAGCAAGTCCTGCTT-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC*\\\\\n",
"\tAAAGCAATCCCATTTA-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC \\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A data.frame: 6 × 2\n",
"\n",
"| <!--/--> | donor <fct> | cell_type <fct> |\n",
"|---|---|---|\n",
"| AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC |\n",
"| AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC* |\n",
"| AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC |\n",
"| AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC* |\n",
"| AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC* |\n",
"| AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC |\n",
"\n"
],
"text/plain": [
" donor cell_type \n",
"AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC \n",
"AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC*\n",
"AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC \n",
"AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC*\n",
"AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC*\n",
"AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#pheno.matrix <- seurat_object@meta.data[,c(\"donor_id\",\"broad_cell_type\",\"anno\")]\n",
"pheno.matrix <- seurat_object@meta.data[,c(\"donor\",\"cell_type\")]\n",
"head(pheno.matrix)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "surface-nevada",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"dataframe\">\n",
"<caption>A data.frame: 2 × 1</caption>\n",
"<thead>\n",
"\t<tr><th></th><th scope=col>labelDescription</th></tr>\n",
"\t<tr><th></th><th scope=col><chr></th></tr>\n",
"</thead>\n",
"<tbody>\n",
"\t<tr><th scope=row>donor</th><td>Donor ID </td></tr>\n",
"\t<tr><th scope=row>cell_type</th><td>Cell type</td></tr>\n",
"</tbody>\n",
"</table>\n"
],
"text/latex": [
"A data.frame: 2 × 1\n",
"\\begin{tabular}{r|l}\n",
" & labelDescription\\\\\n",
" & <chr>\\\\\n",
"\\hline\n",
"\tdonor & Donor ID \\\\\n",
"\tcell\\_type & Cell type\\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A data.frame: 2 × 1\n",
"\n",
"| <!--/--> | labelDescription <chr> |\n",
"|---|---|\n",
"| donor | Donor ID |\n",
"| cell_type | Cell type |\n",
"\n"
],
"text/plain": [
" labelDescription\n",
"donor Donor ID \n",
"cell_type Cell type "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"metadata <- data.frame(labelDescription= c(\"Donor ID\", \"Cell type\"), \n",
" row.names=c(\"donor\", \"cell_type\"))\n",
"metadata"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "deadly-venture",
"metadata": {},
"outputs": [],
"source": [
"library(Matrix) \n",
"#expr_matrix <- integrated@assays$RNA@counts #sparsematrix\n",
"gene_exprs.matrix <- gene_exprs.matrix[tabulate(summary(gene_exprs.matrix)$i) != 0, , drop = FALSE] #remove any feature without a single count\n",
"#expr_matrix <- as.matrix(gene_exprs.matrix) #convert to matrix"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "medical-settlement",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>\n",
".list-inline {list-style: none; margin:0; padding: 0}\n",
".list-inline>li {display: inline-block}\n",
".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n",
"</style>\n",
"<ol class=list-inline><li>15767</li><li>62199</li></ol>\n"
],
"text/latex": [
"\\begin{enumerate*}\n",
"\\item 15767\n",
"\\item 62199\n",
"\\end{enumerate*}\n"
],
"text/markdown": [
"1. 15767\n",
"2. 62199\n",
"\n",
"\n"
],
"text/plain": [
"[1] 15767 62199"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"dim(gene_exprs.matrix)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "dated-injection",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"n_slices=1\n",
"\n",
"converting slice 1/1\n",
"\n",
"columns 1:55815\n",
"\n",
"cbind dense submatrices\n",
"\n"
]
}
],
"source": [
"expr_matrix_den <- SCOPfunctions::utils_big_as.matrix(gene_exprs.matrix, n_slices_init = 1, verbose = T) ## for large matrix"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "conscious-terrorist",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Warning message in asMethod(object):\n",
"“sparse->dense coercion: allocating vector of size 7.3 GiB”\n"
]
}
],
"source": [
"SC.eset = ExpressionSet(assayData = data.matrix(gene_exprs.matrix), \n",
" phenoData = new(\"AnnotatedDataFrame\", data = pheno.matrix, varMetadata = metadata))\n",
"#SC.eset = ExpressionSet(assayData = expr_matrix_den, \n",
"# phenoData = new(\"AnnotatedDataFrame\", data = pheno.matrix, varMetadata = metadata)) ## for large matrix"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "final-hardwood",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ExpressionSet (storageMode: lockedEnvironment)\n",
"assayData: 15767 features, 62199 samples \n",
" element names: exprs \n",
"protocolData: none\n",
"phenoData\n",
" sampleNames: AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1\n",
" AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 ... GACGTGCTCACATACG-92_S5\n",
" (62199 total)\n",
" varLabels: donor cell_type\n",
" varMetadata: labelDescription\n",
"featureData: none\n",
"experimentData: use 'experimentData(object)'\n",
"Annotation: "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"SC.eset"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "minimal-raising",
"metadata": {},
"outputs": [],
"source": [
"saveRDS(SC.eset, file=\"/lustre/scratch126/cellgen/team205/rl20/CTCL/object_revision/All_samples_final_20240707_sub0.08_for_deconv_ExpressionSet.rds\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "enormous-helping",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "4.0.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}