152 lines (151 with data), 4.7 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pylab inline"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from IPython.display import display\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"counts = pd.read_table('read_counts.txt', index_col=0).T\n",
"\n",
"rna_types = [s.split('.')[0] for s in counts.index.tolist() if s.endswith('.mapped')]\n",
"rna_types = 'univec,spikein,rRNA,miRNA,piRNA,Y_RNA,srpRNA,tRNA,snRNA,snoRNA,lncRNA,mRNA,tucpRNA'.split(',')\n",
"nongene_types = ['intron', 'promoter', 'enhancer', 'repeats']\n",
"\n",
"def gradient_func(val):\n",
" return '<span style=\"background: linear-gradient(90deg, #d65f5f {0}%, transparent 0%)\">{0:.3f}</span>'.format(val)\n",
"\n",
"columns = ['clean.unmapped']\n",
"rna_types_enabled = []\n",
"for rna_type in rna_types + nongene_types:\n",
" if rna_type + '.mapped' in counts.columns:\n",
" columns.append(rna_type + '.mapped')\n",
" rna_types_enabled.append(rna_type)\n",
"rna_types = rna_types_enabled\n",
"columns += ['other.mapped']\n",
"counts_unmapped = counts.loc[:, 'other.unmapped']\n",
"counts = counts.loc[:, columns]\n",
"counts.rename(columns={key:re.sub('(\\.mapped)|(\\.unmapped)$', '', key) for key in columns}, inplace=True)\n",
"counts['unannotated'] = counts['other'].subtract(counts.loc[:, nongene_types].sum(axis=1), axis=0)\n",
"counts['unmapped'] = counts_unmapped\n",
"#counts['unmapped'] = counts['clean'].subtract(counts.loc[:, rna_types + ['other']].sum(axis=1), axis=0)\n",
"counts = counts.loc[:, ['clean'] + rna_types_enabled + ['unannotated', 'unmapped']]\n",
"display(counts.style \\\n",
" .set_caption('Read counts'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"percent_by_clean = counts.copy()\n",
"percent_by_clean = 100*percent_by_clean.div(percent_by_clean.loc[:, 'clean'], axis=0)\n",
"percent_by_clean.drop(columns='clean', inplace=True)\n",
"display(percent_by_clean.style \\\n",
" .set_caption('Reads percentage by clean reads') \\\n",
" .format(gradient_func))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"display(percent_by_clean.mean(axis=0).to_frame().T.style \\\n",
" .set_caption('Average reads percentage by clean reads') \\\n",
" .format(gradient_func))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"percent_by_mapped = counts.copy()\n",
"rna_types_remove = [rna_type for rna_type in ['univec', 'rRNA', 'spikein', 'unmapped'] if rna_type in percent_by_mapped.columns]\n",
"percent_by_mapped = 100*percent_by_mapped.div(percent_by_mapped['clean'] \\\n",
" - percent_by_mapped.loc[:, rna_types_remove].sum(axis=1), axis=0)\n",
"percent_by_mapped.drop(columns=['clean'] + rna_types_remove, inplace=True)\n",
"display(percent_by_mapped.style \\\n",
" .set_caption('Reads percentage by mapped') \\\n",
" .format(gradient_func))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"display(percent_by_mapped.mean(axis=0).to_frame().T.style \\\n",
" .set_caption('Averge reads percentage by mapped reads') \\\n",
" .format(gradient_func))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}