{ "cells": [ { "cell_type": "markdown", "id": "single-replacement", "metadata": {}, "source": [ "### bulk ExpressionSet" ] }, { "cell_type": "code", "execution_count": 1, "id": "abroad-circle", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Loading required package: GenomicRanges\n", "\n", "Loading required package: stats4\n", "\n", "Loading required package: BiocGenerics\n", "\n", "Loading required package: parallel\n", "\n", "\n", "Attaching package: ‘BiocGenerics’\n", "\n", "\n", "The following objects are masked from ‘package:parallel’:\n", "\n", " clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,\n", " clusterExport, clusterMap, parApply, parCapply, parLapply,\n", " parLapplyLB, parRapply, parSapply, parSapplyLB\n", "\n", "\n", "The following objects are masked from ‘package:stats’:\n", "\n", " IQR, mad, sd, var, xtabs\n", "\n", "\n", "The following objects are masked from ‘package:base’:\n", "\n", " anyDuplicated, append, as.data.frame, basename, cbind, colnames,\n", " dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,\n", " grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,\n", " order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,\n", " rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,\n", " union, unique, unsplit, which, which.max, which.min\n", "\n", "\n", "Loading required package: S4Vectors\n", "\n", "\n", "Attaching package: ‘S4Vectors’\n", "\n", "\n", "The following object is masked from ‘package:base’:\n", "\n", " expand.grid\n", "\n", "\n", "Loading required package: IRanges\n", "\n", "Loading required package: GenomeInfoDb\n", "\n", "Loading required package: Biobase\n", "\n", "Welcome to Bioconductor\n", "\n", " Vignettes contain introductory material; view with\n", " 'browseVignettes()'. To cite Bioconductor, see\n", " 'citation(\"Biobase\")', and for packages 'citation(\"pkgname\")'.\n", "\n", "\n", "Loading required package: DelayedArray\n", "\n", "Loading required package: matrixStats\n", "\n", "\n", "Attaching package: ‘matrixStats’\n", "\n", "\n", "The following objects are masked from ‘package:Biobase’:\n", "\n", " anyMissing, rowMedians\n", "\n", "\n", "\n", "Attaching package: ‘DelayedArray’\n", "\n", "\n", "The following objects are masked from ‘package:matrixStats’:\n", "\n", " colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges\n", "\n", "\n", "The following objects are masked from ‘package:base’:\n", "\n", " aperm, apply, rowsum\n", "\n", "\n" ] } ], "source": [ "library(SummarizedExperiment)" ] }, { "cell_type": "code", "execution_count": 2, "id": "experimental-disability", "metadata": {}, "outputs": [], "source": [ "Matrix <- read.table(\"/lustre/scratch117/cellgen/team205/rl20/CTCL/deconvolution/GSE121212_readcount_rmdup.txt\", \n", " check.names=F) ### count matrix" ] }, { "cell_type": "code", "execution_count": 3, "id": "graduate-mining", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A data.frame: 6 × 147
AD_004_lesionalAD_004_non-lesionalAD_005_lesionalAD_005_non-lesionalAD_006_lesionalAD_006_non-lesionalAD_007_lesionalAD_007_non-lesionalAD_009_lesionalAD_009_non-lesionalAD_033_chronic_lesionAD_034_non-lesionalAD_034_chronic_lesionAD_032_chronic_lesionAD_035_non-lesionalAD_035_chronic_lesionAD_036_non-lesionalAD_036_chronic_lesionAD_037_non-lesionalAD_037_chronic_lesion
<int><int><int><int><int><int><int><int><int><int><int><int><int><int><int><int><int><int><int><int>
5S_rRNA 0 3 9 12 2 3 6 4 2 5 1 2 3 3 3 4 3 5 6 2
7SK 9 6 25 25 7 8 9 8 6 15 3 8 2 5 12 5 7 5 8 3
A1BG 2 0 4 3 1 1 0 0 4 0 1 1 2 0 1 2 0 0 1 1
A1BG-AS1 5 0 11 8 7 8 13 2 6 19 4 0 7 7 4 4 10 13 8 4
A1CF 2 3 2 8 1 2 0 5 1 5 3 0 1 0 0 2 0 0 1 0
A2M30835325441366978406276192827961891146313911714199868311542361237410402755
\n" ], "text/latex": [ "A data.frame: 6 × 147\n", "\\begin{tabular}{r|lllllllllllllllllllll}\n", " & AD\\_004\\_lesional & AD\\_004\\_non-lesional & AD\\_005\\_lesional & AD\\_005\\_non-lesional & AD\\_006\\_lesional & AD\\_006\\_non-lesional & AD\\_007\\_lesional & AD\\_007\\_non-lesional & AD\\_009\\_lesional & AD\\_009\\_non-lesional & ⋯ & AD\\_033\\_chronic\\_lesion & AD\\_034\\_non-lesional & AD\\_034\\_chronic\\_lesion & AD\\_032\\_chronic\\_lesion & AD\\_035\\_non-lesional & AD\\_035\\_chronic\\_lesion & AD\\_036\\_non-lesional & AD\\_036\\_chronic\\_lesion & AD\\_037\\_non-lesional & AD\\_037\\_chronic\\_lesion\\\\\n", " & & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", "\\hline\n", "\t5S\\_rRNA & 0 & 3 & 9 & 12 & 2 & 3 & 6 & 4 & 2 & 5 & ⋯ & 1 & 2 & 3 & 3 & 3 & 4 & 3 & 5 & 6 & 2\\\\\n", "\t7SK & 9 & 6 & 25 & 25 & 7 & 8 & 9 & 8 & 6 & 15 & ⋯ & 3 & 8 & 2 & 5 & 12 & 5 & 7 & 5 & 8 & 3\\\\\n", "\tA1BG & 2 & 0 & 4 & 3 & 1 & 1 & 0 & 0 & 4 & 0 & ⋯ & 1 & 1 & 2 & 0 & 1 & 2 & 0 & 0 & 1 & 1\\\\\n", "\tA1BG-AS1 & 5 & 0 & 11 & 8 & 7 & 8 & 13 & 2 & 6 & 19 & ⋯ & 4 & 0 & 7 & 7 & 4 & 4 & 10 & 13 & 8 & 4\\\\\n", "\tA1CF & 2 & 3 & 2 & 8 & 1 & 2 & 0 & 5 & 1 & 5 & ⋯ & 3 & 0 & 1 & 0 & 0 & 2 & 0 & 0 & 1 & 0\\\\\n", "\tA2M & 308 & 353 & 2544 & 1366 & 978 & 406 & 2761 & 928 & 2796 & 1891 & ⋯ & 1463 & 1391 & 1714 & 1998 & 683 & 1154 & 2361 & 2374 & 1040 & 2755\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 6 × 147\n", "\n", "| | AD_004_lesional <int> | AD_004_non-lesional <int> | AD_005_lesional <int> | AD_005_non-lesional <int> | AD_006_lesional <int> | AD_006_non-lesional <int> | AD_007_lesional <int> | AD_007_non-lesional <int> | AD_009_lesional <int> | AD_009_non-lesional <int> | ⋯ ⋯ | AD_033_chronic_lesion <int> | AD_034_non-lesional <int> | AD_034_chronic_lesion <int> | AD_032_chronic_lesion <int> | AD_035_non-lesional <int> | AD_035_chronic_lesion <int> | AD_036_non-lesional <int> | AD_036_chronic_lesion <int> | AD_037_non-lesional <int> | AD_037_chronic_lesion <int> |\n", "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", "| 5S_rRNA | 0 | 3 | 9 | 12 | 2 | 3 | 6 | 4 | 2 | 5 | ⋯ | 1 | 2 | 3 | 3 | 3 | 4 | 3 | 5 | 6 | 2 |\n", "| 7SK | 9 | 6 | 25 | 25 | 7 | 8 | 9 | 8 | 6 | 15 | ⋯ | 3 | 8 | 2 | 5 | 12 | 5 | 7 | 5 | 8 | 3 |\n", "| A1BG | 2 | 0 | 4 | 3 | 1 | 1 | 0 | 0 | 4 | 0 | ⋯ | 1 | 1 | 2 | 0 | 1 | 2 | 0 | 0 | 1 | 1 |\n", "| A1BG-AS1 | 5 | 0 | 11 | 8 | 7 | 8 | 13 | 2 | 6 | 19 | ⋯ | 4 | 0 | 7 | 7 | 4 | 4 | 10 | 13 | 8 | 4 |\n", "| A1CF | 2 | 3 | 2 | 8 | 1 | 2 | 0 | 5 | 1 | 5 | ⋯ | 3 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 1 | 0 |\n", "| A2M | 308 | 353 | 2544 | 1366 | 978 | 406 | 2761 | 928 | 2796 | 1891 | ⋯ | 1463 | 1391 | 1714 | 1998 | 683 | 1154 | 2361 | 2374 | 1040 | 2755 |\n", "\n" ], "text/plain": [ " AD_004_lesional AD_004_non-lesional AD_005_lesional\n", "5S_rRNA 0 3 9 \n", "7SK 9 6 25 \n", "A1BG 2 0 4 \n", "A1BG-AS1 5 0 11 \n", "A1CF 2 3 2 \n", "A2M 308 353 2544 \n", " AD_005_non-lesional AD_006_lesional AD_006_non-lesional\n", "5S_rRNA 12 2 3 \n", "7SK 25 7 8 \n", "A1BG 3 1 1 \n", "A1BG-AS1 8 7 8 \n", "A1CF 8 1 2 \n", "A2M 1366 978 406 \n", " AD_007_lesional AD_007_non-lesional AD_009_lesional\n", "5S_rRNA 6 4 2 \n", "7SK 9 8 6 \n", "A1BG 0 0 4 \n", "A1BG-AS1 13 2 6 \n", "A1CF 0 5 1 \n", "A2M 2761 928 2796 \n", " AD_009_non-lesional ⋯ AD_033_chronic_lesion AD_034_non-lesional\n", "5S_rRNA 5 ⋯ 1 2 \n", "7SK 15 ⋯ 3 8 \n", "A1BG 0 ⋯ 1 1 \n", "A1BG-AS1 19 ⋯ 4 0 \n", "A1CF 5 ⋯ 3 0 \n", "A2M 1891 ⋯ 1463 1391 \n", " AD_034_chronic_lesion AD_032_chronic_lesion AD_035_non-lesional\n", "5S_rRNA 3 3 3 \n", "7SK 2 5 12 \n", "A1BG 2 0 1 \n", "A1BG-AS1 7 7 4 \n", "A1CF 1 0 0 \n", "A2M 1714 1998 683 \n", " AD_035_chronic_lesion AD_036_non-lesional AD_036_chronic_lesion\n", "5S_rRNA 4 3 5 \n", "7SK 5 7 5 \n", "A1BG 2 0 0 \n", "A1BG-AS1 4 10 13 \n", "A1CF 2 0 0 \n", "A2M 1154 2361 2374 \n", " AD_037_non-lesional AD_037_chronic_lesion\n", "5S_rRNA 6 2 \n", "7SK 8 3 \n", "A1BG 1 1 \n", "A1BG-AS1 8 4 \n", "A1CF 1 0 \n", "A2M 1040 2755 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
  1. 31362
  2. 147
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 31362\n", "\\item 147\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 31362\n", "2. 147\n", "\n", "\n" ], "text/plain": [ "[1] 31362 147" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "head(Matrix);dim(Matrix)" ] }, { "cell_type": "code", "execution_count": 4, "id": "jewish-sessions", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
  1. 31362
  2. 147
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 31362\n", "\\item 147\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 31362\n", "2. 147\n", "\n", "\n" ], "text/plain": [ "[1] 31362 147" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "is_genename_uniq <- !duplicated(rownames(Matrix))\n", "Matrix <- Matrix[is_genename_uniq, ]\n", "dim(Matrix)" ] }, { "cell_type": "code", "execution_count": 5, "id": "professional-pattern", "metadata": {}, "outputs": [ { "data": { "text/html": [], "text/latex": [], "text/markdown": [], "text/plain": [ "named integer(0)" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tt <- apply(Matrix, 2, sum)\n", "del <- which(tt==0)\n", "del" ] }, { "cell_type": "code", "execution_count": 6, "id": "excess-salvation", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A data.frame: 6 × 1
gene_name
<chr>
5S_rRNA5S_rRNA
7SK7SK
A1BGA1BG
A1BG-AS1A1BG-AS1
A1CFA1CF
A2MA2M
\n" ], "text/latex": [ "A data.frame: 6 × 1\n", "\\begin{tabular}{r|l}\n", " & gene\\_name\\\\\n", " & \\\\\n", "\\hline\n", "\t5S\\_rRNA & 5S\\_rRNA \\\\\n", "\t7SK & 7SK \\\\\n", "\tA1BG & A1BG \\\\\n", "\tA1BG-AS1 & A1BG-AS1\\\\\n", "\tA1CF & A1CF \\\\\n", "\tA2M & A2M \\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 6 × 1\n", "\n", "| | gene_name <chr> |\n", "|---|---|\n", "| 5S_rRNA | 5S_rRNA |\n", "| 7SK | 7SK |\n", "| A1BG | A1BG |\n", "| A1BG-AS1 | A1BG-AS1 |\n", "| A1CF | A1CF |\n", "| A2M | A2M |\n", "\n" ], "text/plain": [ " gene_name\n", "5S_rRNA 5S_rRNA \n", "7SK 7SK \n", "A1BG A1BG \n", "A1BG-AS1 A1BG-AS1 \n", "A1CF A1CF \n", "A2M A2M " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fdat_df <- data.frame(gene_name=rownames(Matrix), stringsAsFactors=F)\n", "rownames(fdat_df) <- rownames(Matrix)\n", "metadata_fdat_df <- data.frame(labelDescription= c(\"gene name\"), row.names=c(\"gene_name\"))\n", "head(fdat_df)" ] }, { "cell_type": "code", "execution_count": 7, "id": "collaborative-inspiration", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A data.frame: 6 × 2
sample_typecondition
<chr><chr>
AD_004_lesionalAD_L AD
AD_004_non-lesionalAD_NLAD
AD_005_lesionalAD_L AD
AD_005_non-lesionalAD_NLAD
AD_006_lesionalAD_L AD
AD_006_non-lesionalAD_NLAD
\n" ], "text/latex": [ "A data.frame: 6 × 2\n", "\\begin{tabular}{r|ll}\n", " & sample\\_type & condition\\\\\n", " & & \\\\\n", "\\hline\n", "\tAD\\_004\\_lesional & AD\\_L & AD\\\\\n", "\tAD\\_004\\_non-lesional & AD\\_NL & AD\\\\\n", "\tAD\\_005\\_lesional & AD\\_L & AD\\\\\n", "\tAD\\_005\\_non-lesional & AD\\_NL & AD\\\\\n", "\tAD\\_006\\_lesional & AD\\_L & AD\\\\\n", "\tAD\\_006\\_non-lesional & AD\\_NL & AD\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 6 × 2\n", "\n", "| | sample_type <chr> | condition <chr> |\n", "|---|---|---|\n", "| AD_004_lesional | AD_L | AD |\n", "| AD_004_non-lesional | AD_NL | AD |\n", "| AD_005_lesional | AD_L | AD |\n", "| AD_005_non-lesional | AD_NL | AD |\n", "| AD_006_lesional | AD_L | AD |\n", "| AD_006_non-lesional | AD_NL | AD |\n", "\n" ], "text/plain": [ " sample_type condition\n", "AD_004_lesional AD_L AD \n", "AD_004_non-lesional AD_NL AD \n", "AD_005_lesional AD_L AD \n", "AD_005_non-lesional AD_NL AD \n", "AD_006_lesional AD_L AD \n", "AD_006_non-lesional AD_NL AD " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "metadata <- read.table(\"/lustre/scratch117/cellgen/team205/rl20/CTCL/deconvolution/GSE121212_meta.xls\", \n", " check.names=F, sep=\"\\t\", header = T)\n", "head(metadata)" ] }, { "cell_type": "code", "execution_count": 8, "id": "declared-cookbook", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A data.frame: 6 × 2
sample_typecondition
<chr><chr>
AD_004_lesionalAD_L AD
AD_004_non-lesionalAD_NLAD
AD_005_lesionalAD_L AD
AD_005_non-lesionalAD_NLAD
AD_006_lesionalAD_L AD
AD_006_non-lesionalAD_NLAD
\n" ], "text/latex": [ "A data.frame: 6 × 2\n", "\\begin{tabular}{r|ll}\n", " & sample\\_type & condition\\\\\n", " & & \\\\\n", "\\hline\n", "\tAD\\_004\\_lesional & AD\\_L & AD\\\\\n", "\tAD\\_004\\_non-lesional & AD\\_NL & AD\\\\\n", "\tAD\\_005\\_lesional & AD\\_L & AD\\\\\n", "\tAD\\_005\\_non-lesional & AD\\_NL & AD\\\\\n", "\tAD\\_006\\_lesional & AD\\_L & AD\\\\\n", "\tAD\\_006\\_non-lesional & AD\\_NL & AD\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 6 × 2\n", "\n", "| | sample_type <chr> | condition <chr> |\n", "|---|---|---|\n", "| AD_004_lesional | AD_L | AD |\n", "| AD_004_non-lesional | AD_NL | AD |\n", "| AD_005_lesional | AD_L | AD |\n", "| AD_005_non-lesional | AD_NL | AD |\n", "| AD_006_lesional | AD_L | AD |\n", "| AD_006_non-lesional | AD_NL | AD |\n", "\n" ], "text/plain": [ " sample_type condition\n", "AD_004_lesional AD_L AD \n", "AD_004_non-lesional AD_NL AD \n", "AD_005_lesional AD_L AD \n", "AD_005_non-lesional AD_NL AD \n", "AD_006_lesional AD_L AD \n", "AD_006_non-lesional AD_NL AD " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "pdat_df <- data.frame(sample_type = metadata$sample_type, \n", " condition = metadata$condition,\n", " stringsAsFactors = F)\n", "rownames(pdat_df) <- rownames(metadata)\n", "head(pdat_df)" ] }, { "cell_type": "code", "execution_count": 9, "id": "following-sleeve", "metadata": {}, "outputs": [], "source": [ "metadata_pdat_df <- data.frame(labelDescription= c(\"lesion_nonlesion\", \"Condition\"), \n", " row.names=c(\"sample_type\", \"condition\"))" ] }, { "cell_type": "code", "execution_count": 10, "id": "racial-modem", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\n", "
A data.frame: 2 × 1
labelDescription
<chr>
sample_typelesion_nonlesion
conditionCondition
\n" ], "text/latex": [ "A data.frame: 2 × 1\n", "\\begin{tabular}{r|l}\n", " & labelDescription\\\\\n", " & \\\\\n", "\\hline\n", "\tsample\\_type & lesion\\_nonlesion\\\\\n", "\tcondition & Condition \\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 2 × 1\n", "\n", "| | labelDescription <chr> |\n", "|---|---|\n", "| sample_type | lesion_nonlesion |\n", "| condition | Condition |\n", "\n" ], "text/plain": [ " labelDescription\n", "sample_type lesion_nonlesion\n", "condition Condition " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "metadata_pdat_df" ] }, { "cell_type": "code", "execution_count": 11, "id": "pursuant-switch", "metadata": {}, "outputs": [], "source": [ "Matrix <- Matrix[ ,rownames(pdat_df)] ### the order of rownames(pdat_df) and colnames(Matrix) might be different" ] }, { "cell_type": "code", "execution_count": 12, "id": "comfortable-prague", "metadata": {}, "outputs": [ { "data": { "text/html": [ "TRUE" ], "text/latex": [ "TRUE" ], "text/markdown": [ "TRUE" ], "text/plain": [ "[1] TRUE" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "all.equal(colnames(Matrix), rownames(pdat_df))" ] }, { "cell_type": "code", "execution_count": 13, "id": "ruled-spencer", "metadata": {}, "outputs": [], "source": [ "bulk_eset <- ExpressionSet(\n", " assayData = data.matrix(Matrix),\n", " phenoData=new(\"AnnotatedDataFrame\", data = pdat_df, varMetadata = metadata_pdat_df),\n", " featureData=new(\"AnnotatedDataFrame\", data = fdat_df, varMetadata = metadata_fdat_df))" ] }, { "cell_type": "code", "execution_count": 14, "id": "renewable-mentor", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ExpressionSet (storageMode: lockedEnvironment)\n", "assayData: 31362 features, 147 samples \n", " element names: exprs \n", "protocolData: none\n", "phenoData\n", " sampleNames: AD_004_lesional AD_004_non-lesional ...\n", " AD_037_chronic_lesion (147 total)\n", " varLabels: sample_type condition\n", " varMetadata: labelDescription\n", "featureData\n", " featureNames: 5S_rRNA 7SK ... snoZ5 (31362 total)\n", " fvarLabels: gene_name\n", " fvarMetadata: labelDescription\n", "experimentData: use 'experimentData(object)'\n", "Annotation: " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "bulk_eset" ] }, { "cell_type": "code", "execution_count": 17, "id": "personalized-power", "metadata": {}, "outputs": [], "source": [ "saveRDS(bulk_eset, file=\"/lustre/scratch117/cellgen/team205/rl20/CTCL/deconvolution/GSE121212_readcount_ExpressionSet.rds\")" ] }, { "cell_type": "markdown", "id": "floral-undergraduate", "metadata": {}, "source": [ "### Single cell ExpressionSet" ] }, { "cell_type": "code", "execution_count": 2, "id": "passive-sponsorship", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Loading required package: reticulate\n", "\n", "Attaching SeuratObject\n", "\n", "\n", "Attaching package: ‘Seurat’\n", "\n", "\n", "The following object is masked from ‘package:SummarizedExperiment’:\n", "\n", " Assays\n", "\n", "\n", "\n", "Attaching package: ‘Matrix’\n", "\n", "\n", "The following object is masked from ‘package:S4Vectors’:\n", "\n", " expand\n", "\n", "\n" ] } ], "source": [ "library(sceasy)\n", "library(reticulate)\n", "library(anndata)\n", "library(Seurat)\n", "library(BisqueRNA)\n", "library(Biobase)\n", "library(Matrix)" ] }, { "cell_type": "code", "execution_count": 3, "id": "determined-permit", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 7.3 GiB”\n" ] } ], "source": [ "h5ad_file <- \"/lustre/scratch126/cellgen/team205/rl20/CTCL/object_revision/All_samples_final_20240707_sub0.08_for_deconv.h5ad\"\n", "sdata <- read_h5ad(h5ad_file)\n", "seurat_object <- CreateSeuratObject(counts = t(as.matrix(sdata$X)), meta.data = sdata$obs)" ] }, { "cell_type": "code", "execution_count": 4, "id": "criminal-manor", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "An object of class Seurat \n", "15777 features across 62199 samples within 1 assay \n", "Active assay: RNA (15777 features, 0 variable features)" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "seurat_object" ] }, { "cell_type": "code", "execution_count": 5, "id": "beginning-tongue", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " [[ suppressing 20 column names ‘AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1’, ‘AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1’, ‘AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1’ ... ]]\n", "\n" ] }, { "data": { "text/plain": [ "20 x 20 sparse Matrix of class \"dgCMatrix\"\n", " \n", "SAMD11 . . . . . . . . . . . . . . . . . . . .\n", "NOC2L 1 1 . . 2 . . 1 . . . 3 1 . . . . . . .\n", "KLHL17 . . . . . . . . . . . . . . . . . . . .\n", "PLEKHN1 2 . . . . . . . 1 . . 1 . . . . . . . .\n", "HES4 . . 1 . . . . . . . 2 . . . . . 1 . . .\n", "ISG15 . . . . . . . 1 . . . . . . . . . . . .\n", "AGRN . . . . . . . . . 1 . . . . 1 . . . . .\n", "RNF223 . . . . . . . . . . . . . . . . . . . .\n", "C1orf159 . . . . . . . . . . 1 . . . . . . . . .\n", "TTLL10 . . . . . . . . . . . . . . . . . . . .\n", "TNFRSF18 3 1 1 . 1 . . 5 . . . 1 3 . . 1 . . . .\n", "TNFRSF4 . . . . . . . 3 . 1 . . . . . . . . . .\n", "SDF4 6 1 5 1 1 3 . 2 . . . 3 . 2 2 1 1 . . .\n", "B3GALT6 . . . . . . . . . . . . . . . . . . . .\n", "C1QTNF12 . . 1 . . . . . . . . . . . 1 . . . 1 .\n", "UBE2J2 . 1 . . 3 . . 1 . . 4 1 . 1 . . 2 . . .\n", "SCNN1D . . 1 . . . . . . . . . . . . . . . . .\n", "ACAP3 . . . . . 1 . . . . . . . . . . . . . .\n", "PUSL1 . . . . . . . . 1 . . . . . . . . . . .\n", "INTS11 3 . . . . 1 . . . . 2 . . 2 1 1 . . . ." ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "gene_exprs.matrix <- seurat_object@assays$RNA@counts\n", "gene_exprs.matrix[1:20,1:20]\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "marine-philosophy", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A data.frame: 6 × 22
orig.identnCount_RNAnFeature_RNAbatchn_countsn_genespercent_mitodonortechsample_typebroad_ctctpredicted_labelsctover_clusteringctmajority_votingctconf_scorecell_typesitetissueSextype
<fct><dbl><int><fct><dbl><dbl><dbl><fct><fct><fct><fct><fct><fct><fct><dbl><fct><fct><fct><fct><fct>
AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1SeuratProject14602314202757734740.0011241252CTCL110xCTCLKeratinocytesDifferentiated_KC*286Differentiated_KC 0.96744886Differentiated_KC lesionEpidermisFemaleNA
AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1SeuratProject13463331102102136420.0004281433CTCL110xCTCLKeratinocytesDifferentiated_KC*492Differentiated_KC*0.13851590Differentiated_KC*lesionEpidermisFemaleNA
AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1SeuratProject14551347202783738240.0024787153CTCL110xCTCLKeratinocytesDifferentiated_KC*535Differentiated_KC 0.01321663Differentiated_KC lesionEpidermisFemaleNA
AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1SeuratProject11215263201754529430.0056426334CTCL110xCTCLKeratinocytesDifferentiated_KC*298Differentiated_KC*0.90835925Differentiated_KC*lesionEpidermisFemaleNA
AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1SeuratProject 615023840 913626650.0010945710CTCL110xCTCLKeratinocytesVE2 368Differentiated_KC*0.04688790Differentiated_KC*lesionEpidermisFemaleNA
AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1SeuratProject 5296186001180321010.0155045325CTCL110xCTCLKeratinocytesDifferentiated_KC 438Differentiated_KC 0.67155350Differentiated_KC lesionEpidermisFemaleNA
\n" ], "text/latex": [ "A data.frame: 6 × 22\n", "\\begin{tabular}{r|lllllllllllllllllllll}\n", " & orig.ident & nCount\\_RNA & nFeature\\_RNA & batch & n\\_counts & n\\_genes & percent\\_mito & donor & tech & sample\\_type & ⋯ & broad\\_ct & ctpredicted\\_labels & ctover\\_clustering & ctmajority\\_voting & ctconf\\_score & cell\\_type & site & tissue & Sex & type\\\\\n", " & & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", "\\hline\n", "\tAAACCTGCATCACAAC-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 14602 & 3142 & 0 & 27577 & 3474 & 0.0011241252 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & Differentiated\\_KC* & 286 & Differentiated\\_KC & 0.96744886 & Differentiated\\_KC & lesion & Epidermis & Female & NA\\\\\n", "\tAAACCTGCATGTAAGA-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 13463 & 3311 & 0 & 21021 & 3642 & 0.0004281433 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & Differentiated\\_KC* & 492 & Differentiated\\_KC* & 0.13851590 & Differentiated\\_KC* & lesion & Epidermis & Female & NA\\\\\n", "\tAAACGGGGTCGACTGC-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 14551 & 3472 & 0 & 27837 & 3824 & 0.0024787153 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & Differentiated\\_KC* & 535 & Differentiated\\_KC & 0.01321663 & Differentiated\\_KC & lesion & Epidermis & Female & NA\\\\\n", "\tAAAGCAAAGCGTAATA-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 11215 & 2632 & 0 & 17545 & 2943 & 0.0056426334 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & Differentiated\\_KC* & 298 & Differentiated\\_KC* & 0.90835925 & Differentiated\\_KC* & lesion & Epidermis & Female & NA\\\\\n", "\tAAAGCAAGTCCTGCTT-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 6150 & 2384 & 0 & 9136 & 2665 & 0.0010945710 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & VE2 & 368 & Differentiated\\_KC* & 0.04688790 & Differentiated\\_KC* & lesion & Epidermis & Female & NA\\\\\n", "\tAAAGCAATCCCATTTA-0\\_CTCL1\\_CTCL1\\_CTCL1 & SeuratProject & 5296 & 1860 & 0 & 11803 & 2101 & 0.0155045325 & CTCL1 & 10x & CTCL & ⋯ & Keratinocytes & Differentiated\\_KC & 438 & Differentiated\\_KC & 0.67155350 & Differentiated\\_KC & lesion & Epidermis & Female & NA\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 6 × 22\n", "\n", "| | orig.ident <fct> | nCount_RNA <dbl> | nFeature_RNA <int> | batch <fct> | n_counts <dbl> | n_genes <dbl> | percent_mito <dbl> | donor <fct> | tech <fct> | sample_type <fct> | ⋯ ⋯ | broad_ct <fct> | ctpredicted_labels <fct> | ctover_clustering <fct> | ctmajority_voting <fct> | ctconf_score <dbl> | cell_type <fct> | site <fct> | tissue <fct> | Sex <fct> | type <fct> |\n", "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", "| AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 14602 | 3142 | 0 | 27577 | 3474 | 0.0011241252 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | Differentiated_KC* | 286 | Differentiated_KC | 0.96744886 | Differentiated_KC | lesion | Epidermis | Female | NA |\n", "| AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 13463 | 3311 | 0 | 21021 | 3642 | 0.0004281433 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | Differentiated_KC* | 492 | Differentiated_KC* | 0.13851590 | Differentiated_KC* | lesion | Epidermis | Female | NA |\n", "| AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 14551 | 3472 | 0 | 27837 | 3824 | 0.0024787153 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | Differentiated_KC* | 535 | Differentiated_KC | 0.01321663 | Differentiated_KC | lesion | Epidermis | Female | NA |\n", "| AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 11215 | 2632 | 0 | 17545 | 2943 | 0.0056426334 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | Differentiated_KC* | 298 | Differentiated_KC* | 0.90835925 | Differentiated_KC* | lesion | Epidermis | Female | NA |\n", "| AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 6150 | 2384 | 0 | 9136 | 2665 | 0.0010945710 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | VE2 | 368 | Differentiated_KC* | 0.04688790 | Differentiated_KC* | lesion | Epidermis | Female | NA |\n", "| AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 | SeuratProject | 5296 | 1860 | 0 | 11803 | 2101 | 0.0155045325 | CTCL1 | 10x | CTCL | ⋯ | Keratinocytes | Differentiated_KC | 438 | Differentiated_KC | 0.67155350 | Differentiated_KC | lesion | Epidermis | Female | NA |\n", "\n" ], "text/plain": [ " orig.ident nCount_RNA nFeature_RNA\n", "AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 SeuratProject 14602 3142 \n", "AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 SeuratProject 13463 3311 \n", "AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 SeuratProject 14551 3472 \n", "AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 SeuratProject 11215 2632 \n", "AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 SeuratProject 6150 2384 \n", "AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 SeuratProject 5296 1860 \n", " batch n_counts n_genes percent_mito donor\n", "AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 0 27577 3474 0.0011241252 CTCL1\n", "AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 0 21021 3642 0.0004281433 CTCL1\n", "AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 0 27837 3824 0.0024787153 CTCL1\n", "AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 0 17545 2943 0.0056426334 CTCL1\n", "AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 0 9136 2665 0.0010945710 CTCL1\n", "AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 0 11803 2101 0.0155045325 CTCL1\n", " tech sample_type ⋯ broad_ct \n", "AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n", "AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n", "AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n", "AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n", "AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n", "AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 10x CTCL ⋯ Keratinocytes\n", " ctpredicted_labels ctover_clustering\n", "AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 286 \n", "AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 492 \n", "AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 535 \n", "AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 298 \n", "AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 VE2 368 \n", "AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC 438 \n", " ctmajority_voting ctconf_score\n", "AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC 0.96744886 \n", "AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 0.13851590 \n", "AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC 0.01321663 \n", "AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 0.90835925 \n", "AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* 0.04688790 \n", "AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC 0.67155350 \n", " cell_type site tissue Sex \n", "AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC lesion Epidermis Female\n", "AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* lesion Epidermis Female\n", "AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 Differentiated_KC lesion Epidermis Female\n", "AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* lesion Epidermis Female\n", "AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 Differentiated_KC* lesion Epidermis Female\n", "AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 Differentiated_KC lesion Epidermis Female\n", " type\n", "AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 NA \n", "AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 NA \n", "AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 NA \n", "AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 NA \n", "AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 NA \n", "AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 NA " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "head(seurat_object@meta.data)" ] }, { "cell_type": "code", "execution_count": 7, "id": "sacred-retreat", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A data.frame: 6 × 2
donorcell_type
<fct><fct>
AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1CTCL1Differentiated_KC
AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1CTCL1Differentiated_KC*
AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1CTCL1Differentiated_KC
AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1CTCL1Differentiated_KC*
AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1CTCL1Differentiated_KC*
AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1CTCL1Differentiated_KC
\n" ], "text/latex": [ "A data.frame: 6 × 2\n", "\\begin{tabular}{r|ll}\n", " & donor & cell\\_type\\\\\n", " & & \\\\\n", "\\hline\n", "\tAAACCTGCATCACAAC-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC \\\\\n", "\tAAACCTGCATGTAAGA-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC*\\\\\n", "\tAAACGGGGTCGACTGC-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC \\\\\n", "\tAAAGCAAAGCGTAATA-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC*\\\\\n", "\tAAAGCAAGTCCTGCTT-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC*\\\\\n", "\tAAAGCAATCCCATTTA-0\\_CTCL1\\_CTCL1\\_CTCL1 & CTCL1 & Differentiated\\_KC \\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 6 × 2\n", "\n", "| | donor <fct> | cell_type <fct> |\n", "|---|---|---|\n", "| AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC |\n", "| AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC* |\n", "| AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC |\n", "| AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC* |\n", "| AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC* |\n", "| AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 | CTCL1 | Differentiated_KC |\n", "\n" ], "text/plain": [ " donor cell_type \n", "AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC \n", "AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC*\n", "AAACGGGGTCGACTGC-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC \n", "AAAGCAAAGCGTAATA-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC*\n", "AAAGCAAGTCCTGCTT-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC*\n", "AAAGCAATCCCATTTA-0_CTCL1_CTCL1_CTCL1 CTCL1 Differentiated_KC " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#pheno.matrix <- seurat_object@meta.data[,c(\"donor_id\",\"broad_cell_type\",\"anno\")]\n", "pheno.matrix <- seurat_object@meta.data[,c(\"donor\",\"cell_type\")]\n", "head(pheno.matrix)" ] }, { "cell_type": "code", "execution_count": 8, "id": "surface-nevada", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\n", "
A data.frame: 2 × 1
labelDescription
<chr>
donorDonor ID
cell_typeCell type
\n" ], "text/latex": [ "A data.frame: 2 × 1\n", "\\begin{tabular}{r|l}\n", " & labelDescription\\\\\n", " & \\\\\n", "\\hline\n", "\tdonor & Donor ID \\\\\n", "\tcell\\_type & Cell type\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 2 × 1\n", "\n", "| | labelDescription <chr> |\n", "|---|---|\n", "| donor | Donor ID |\n", "| cell_type | Cell type |\n", "\n" ], "text/plain": [ " labelDescription\n", "donor Donor ID \n", "cell_type Cell type " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "metadata <- data.frame(labelDescription= c(\"Donor ID\", \"Cell type\"), \n", " row.names=c(\"donor\", \"cell_type\"))\n", "metadata" ] }, { "cell_type": "code", "execution_count": 9, "id": "deadly-venture", "metadata": {}, "outputs": [], "source": [ "library(Matrix) \n", "#expr_matrix <- integrated@assays$RNA@counts #sparsematrix\n", "gene_exprs.matrix <- gene_exprs.matrix[tabulate(summary(gene_exprs.matrix)$i) != 0, , drop = FALSE] #remove any feature without a single count\n", "#expr_matrix <- as.matrix(gene_exprs.matrix) #convert to matrix" ] }, { "cell_type": "code", "execution_count": 10, "id": "medical-settlement", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
  1. 15767
  2. 62199
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 15767\n", "\\item 62199\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 15767\n", "2. 62199\n", "\n", "\n" ], "text/plain": [ "[1] 15767 62199" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dim(gene_exprs.matrix)" ] }, { "cell_type": "code", "execution_count": 28, "id": "dated-injection", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "n_slices=1\n", "\n", "converting slice 1/1\n", "\n", "columns 1:55815\n", "\n", "cbind dense submatrices\n", "\n" ] } ], "source": [ "expr_matrix_den <- SCOPfunctions::utils_big_as.matrix(gene_exprs.matrix, n_slices_init = 1, verbose = T) ## for large matrix" ] }, { "cell_type": "code", "execution_count": 11, "id": "conscious-terrorist", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 7.3 GiB”\n" ] } ], "source": [ "SC.eset = ExpressionSet(assayData = data.matrix(gene_exprs.matrix), \n", " phenoData = new(\"AnnotatedDataFrame\", data = pheno.matrix, varMetadata = metadata))\n", "#SC.eset = ExpressionSet(assayData = expr_matrix_den, \n", "# phenoData = new(\"AnnotatedDataFrame\", data = pheno.matrix, varMetadata = metadata)) ## for large matrix" ] }, { "cell_type": "code", "execution_count": 12, "id": "final-hardwood", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ExpressionSet (storageMode: lockedEnvironment)\n", "assayData: 15767 features, 62199 samples \n", " element names: exprs \n", "protocolData: none\n", "phenoData\n", " sampleNames: AAACCTGCATCACAAC-0_CTCL1_CTCL1_CTCL1\n", " AAACCTGCATGTAAGA-0_CTCL1_CTCL1_CTCL1 ... GACGTGCTCACATACG-92_S5\n", " (62199 total)\n", " varLabels: donor cell_type\n", " varMetadata: labelDescription\n", "featureData: none\n", "experimentData: use 'experimentData(object)'\n", "Annotation: " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "SC.eset" ] }, { "cell_type": "code", "execution_count": null, "id": "minimal-raising", "metadata": {}, "outputs": [], "source": [ "saveRDS(SC.eset, file=\"/lustre/scratch126/cellgen/team205/rl20/CTCL/object_revision/All_samples_final_20240707_sub0.08_for_deconv_ExpressionSet.rds\")" ] }, { "cell_type": "code", "execution_count": null, "id": "enormous-helping", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "4.0.4" } }, "nbformat": 4, "nbformat_minor": 5 }