2901 lines (2900 with data), 266.3 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Aims**:\n",
" - list high-impact works to aid navigation of the field\n",
" - check for unexpectedly common authors/affiliations/journals to screening for potential false-positive matches (see the Integromics and Panomics companies)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%run notebook_setup.ipynb"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"Imported:\n",
"\n",
" - `literature` (904B0F94)\n",
" - `affiliations` (E06399F2)\n",
" - `authors` (DC49BC74)\n",
" - `publication_types` (7DD4E741)\n",
"\n",
"at Wednesday, 05. Aug 2020 16:22"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {
"text/markdown": {
"action": "import",
"command": "from pubmed_derived_data import literature, affiliations, authors, publication_types",
"finished": "2020-08-05T16:22:34.123010",
"finished_human_readable": "Wednesday, 05. Aug 2020 16:22",
"result": [
{
"new_file": {
"crc32": "904B0F94",
"sha256": "A2EFC068A287A3B724AE4B320EE5356E1E99474BD08A2E2A3EBA34CD0194F23B"
},
"subject": "literature"
},
{
"new_file": {
"crc32": "E06399F2",
"sha256": "8DD13D4B7CF3D2E314BBC4E051AEDBF21414371F42BB4D100D7721B5F4D24E60"
},
"subject": "affiliations"
},
{
"new_file": {
"crc32": "DC49BC74",
"sha256": "237BEFD0FDA68E2A155B9EC00519017B4C9BC92BD2AA3D10E058A013EC0DE1D9"
},
"subject": "authors"
},
{
"new_file": {
"crc32": "7DD4E741",
"sha256": "BD0EBF88B38BB9E0E44923E2CB473A532AEFBFFC6A7FCC02926290CAD2615150"
},
"subject": "publication_types"
}
],
"started": "2020-08-05T16:22:30.319280"
}
},
"output_type": "display_data"
}
],
"source": [
"%vault from pubmed_derived_data import literature, affiliations, authors, publication_types"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"Imported:\n",
"\n",
" - `web_of_science_journals` (E95CE31E)\n",
" - `scimago_by_issn` (DDCBFB24)\n",
"\n",
"at Wednesday, 05. Aug 2020 16:22"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {
"text/markdown": {
"action": "import",
"command": "from journals_data import web_of_science_journals, scimago_by_issn",
"finished": "2020-08-05T16:22:36.022687",
"finished_human_readable": "Wednesday, 05. Aug 2020 16:22",
"result": [
{
"new_file": {
"crc32": "E95CE31E",
"sha256": "55F51248C28FEEC07B4E5A98AD3660519AD3566DC9B61985279E6D4C9B374BF8"
},
"subject": "web_of_science_journals"
},
{
"new_file": {
"crc32": "DDCBFB24",
"sha256": "B16E18A78F3247A03950A39AB7B64E92EAFA747074BB6B2DBFEBDA7DCA5902D3"
},
"subject": "scimago_by_issn"
}
],
"started": "2020-08-05T16:22:34.145419"
}
},
"output_type": "display_data"
}
],
"source": [
"%vault from journals_data import web_of_science_journals, scimago_by_issn"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"literature['journal_sjr_rank'] = (\n",
" literature['journal_issn']\n",
" .str.replace('-', '')\n",
" .fillna('-')\n",
" .apply(\n",
" lambda x: (\n",
" scimago_by_issn.loc[x].Rank\n",
" if x in scimago_by_issn.index else\n",
" None\n",
" )\n",
" )\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## A quick overview/hot-takes"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"columns_to_show = ['title', 'journal', 'doi', 'journal_sjr_rank']"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def display_sorted(data):\n",
" return data[columns_to_show].sort_values(['journal_sjr_rank', 'title'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Benchmarks"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>journal</th>\n",
" <th>doi</th>\n",
" <th>journal_sjr_rank</th>\n",
" </tr>\n",
" <tr>\n",
" <th>uid</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>30295871</th>\n",
" <td>Multi-omic and multi-view clustering algorithm...</td>\n",
" <td>Nucleic acids research</td>\n",
" <td>10.1093/nar/gky889</td>\n",
" <td>90.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30496480</th>\n",
" <td>Multi-omic and multi-view clustering algorithm...</td>\n",
" <td>Nucleic acids research</td>\n",
" <td>10.1093/nar/gky1226</td>\n",
" <td>90.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22121217</th>\n",
" <td>The Stem Cell Discovery Engine: an integrated ...</td>\n",
" <td>Nucleic acids research</td>\n",
" <td>10.1093/nar/gkr1051</td>\n",
" <td>90.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32234303</th>\n",
" <td>Multiomics Evaluation of Gastrointestinal and ...</td>\n",
" <td>Gastroenterology</td>\n",
" <td>10.1053/j.gastro.2020.03.045</td>\n",
" <td>169.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31154149</th>\n",
" <td>Quantitative CMR population imaging on 20,000 ...</td>\n",
" <td>Medical image analysis</td>\n",
" <td>10.1016/j.media.2019.05.006</td>\n",
" <td>409.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30068331</th>\n",
" <td>Species comparison of liver proteomes reveals ...</td>\n",
" <td>BMC biology</td>\n",
" <td>10.1186/s12915-018-0547-y</td>\n",
" <td>447.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32437529</th>\n",
" <td>Integrating multi-OMICS data through sparse Ca...</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" <td>10.1093/bioinformatics/btaa530</td>\n",
" <td>484.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31792509</th>\n",
" <td>Clustering and variable selection evaluation o...</td>\n",
" <td>Briefings in bioinformatics</td>\n",
" <td>10.1093/bib/bbz138</td>\n",
" <td>625.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29688321</th>\n",
" <td>Comparison and evaluation of integrative metho...</td>\n",
" <td>Briefings in bioinformatics</td>\n",
" <td>10.1093/bib/bby027</td>\n",
" <td>625.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31220206</th>\n",
" <td>Evaluation of integrative clustering methods f...</td>\n",
" <td>Briefings in bioinformatics</td>\n",
" <td>10.1093/bib/bbz015</td>\n",
" <td>625.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29272335</th>\n",
" <td>Multi-omics integration-a comparison of unsupe...</td>\n",
" <td>Briefings in bioinformatics</td>\n",
" <td>10.1093/bib/bbx167</td>\n",
" <td>625.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30368064</th>\n",
" <td>Multi-omics at single-cell resolution: compari...</td>\n",
" <td>Current opinion in biotechnology</td>\n",
" <td>10.1016/j.copbio.2018.09.012</td>\n",
" <td>740.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32255618</th>\n",
" <td>Evaluation of Microbiome-Host Relationships in...</td>\n",
" <td>Environmental science & technology</td>\n",
" <td>10.1021/acs.est.0c00628</td>\n",
" <td>797.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25414848</th>\n",
" <td>Multi-omic landscape of rheumatoid arthritis: ...</td>\n",
" <td>Frontiers in cell and developmental biology</td>\n",
" <td>10.3389/fcell.2014.00059</td>\n",
" <td>862.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31292535</th>\n",
" <td>Multi-omic molecular comparison of primary ver...</td>\n",
" <td>British journal of cancer</td>\n",
" <td>10.1038/s41416-019-0507-5</td>\n",
" <td>941.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30866779</th>\n",
" <td>Multi-omics comparisons of p-aminosalicylic ac...</td>\n",
" <td>Emerging microbes & infections</td>\n",
" <td>10.1080/22221751.2019.1568179</td>\n",
" <td>1103.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22954204</th>\n",
" <td>Systematic comparison of reverse phase and hyd...</td>\n",
" <td>Analytical chemistry</td>\n",
" <td>10.1021/ac3012494</td>\n",
" <td>1178.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24216987</th>\n",
" <td>Metabolomic Dynamic Analysis of Hypoxia in MDA...</td>\n",
" <td>Cancers</td>\n",
" <td>10.3390/cancers5020491</td>\n",
" <td>1388.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29936248</th>\n",
" <td>U-BIOPRED: evaluation of the value of a public...</td>\n",
" <td>Drug discovery today</td>\n",
" <td>10.1016/j.drudis.2018.06.015</td>\n",
" <td>1444.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31312416</th>\n",
" <td>An Evaluation of Machine Learning Approaches f...</td>\n",
" <td>Computational and structural biotechnology jou...</td>\n",
" <td>10.1016/j.csbj.2019.05.008</td>\n",
" <td>1637.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29213276</th>\n",
" <td>An Integrated \"Multi-Omics\" Comparison of Embr...</td>\n",
" <td>Frontiers in plant science</td>\n",
" <td>10.3389/fpls.2017.01984</td>\n",
" <td>1794.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29740416</th>\n",
" <td>SplinectomeR Enables Group Comparisons in Long...</td>\n",
" <td>Frontiers in microbiology</td>\n",
" <td>10.3389/fmicb.2018.00785</td>\n",
" <td>1798.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29920461</th>\n",
" <td>Multi 'omics comparison reveals metabolome bio...</td>\n",
" <td>The Science of the total environment</td>\n",
" <td>10.1016/j.scitotenv.2018.05.256</td>\n",
" <td>1854.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29212468</th>\n",
" <td>A comparison of graph- and kernel-based -omics...</td>\n",
" <td>BMC bioinformatics</td>\n",
" <td>10.1186/s12859-017-1982-4</td>\n",
" <td>1930.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30234027</th>\n",
" <td>Comparison of Fecal Collection Methods for Mic...</td>\n",
" <td>Frontiers in cellular and infection microbiology</td>\n",
" <td>10.3389/fcimb.2018.00301</td>\n",
" <td>1931.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31792061</th>\n",
" <td>Evaluation of colorectal cancer subtypes and c...</td>\n",
" <td>Life science alliance</td>\n",
" <td>10.26508/lsa.201900517</td>\n",
" <td>2050.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31649733</th>\n",
" <td>CEPICS: A Comparison and Evaluation Platform f...</td>\n",
" <td>Frontiers in genetics</td>\n",
" <td>10.3389/fgene.2019.00966</td>\n",
" <td>2313.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27929400</th>\n",
" <td>A Systematic Evaluation of Blood Serum and Pla...</td>\n",
" <td>International journal of molecular sciences</td>\n",
" <td>NaN</td>\n",
" <td>2808.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32386347</th>\n",
" <td>Comparison of Proteomic Assessment Methods in ...</td>\n",
" <td>Proteomics</td>\n",
" <td>10.1002/pmic.201900278</td>\n",
" <td>2944.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22796353</th>\n",
" <td>Effects of pre-storage leukoreduction on store...</td>\n",
" <td>Journal of proteomics</td>\n",
" <td>10.1016/j.jprot.2012.06.032</td>\n",
" <td>3300.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32252356</th>\n",
" <td>Deep Functional Profiling Facilitates the Eval...</td>\n",
" <td>Antibiotics (Basel, Switzerland)</td>\n",
" <td>10.3390/antibiotics9040157</td>\n",
" <td>3371.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30596412</th>\n",
" <td>Growth Performance and Meat Quality Evaluation...</td>\n",
" <td>Journal of agricultural and food chemistry</td>\n",
" <td>10.1021/acs.jafc.8b05477</td>\n",
" <td>3803.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28927277</th>\n",
" <td>Benchmark Dose Modeling Estimates of the Conce...</td>\n",
" <td>Chemical research in toxicology</td>\n",
" <td>10.1021/acs.chemrestox.7b00221</td>\n",
" <td>4330.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31942259</th>\n",
" <td>A multiomics comparison between endometrial ca...</td>\n",
" <td>PeerJ</td>\n",
" <td>10.7717/peerj.8347</td>\n",
" <td>4381.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28804556</th>\n",
" <td>Anti-tumor efficacy evaluation of a novel mono...</td>\n",
" <td>American journal of translational research</td>\n",
" <td>NaN</td>\n",
" <td>4480.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21081137</th>\n",
" <td>Cross-study and cross-omics comparisons of thr...</td>\n",
" <td>Toxicology and applied pharmacology</td>\n",
" <td>10.1016/j.taap.2010.11.006</td>\n",
" <td>4714.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28546903</th>\n",
" <td>Comprehensive reconstruction and evaluation of...</td>\n",
" <td>Bioresources and bioprocessing</td>\n",
" <td>10.1186/s40643-017-0152-x</td>\n",
" <td>5398.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26495307</th>\n",
" <td>Cross-omics comparison of stress responses in ...</td>\n",
" <td>BioMed research international</td>\n",
" <td>10.1155/2015/628158</td>\n",
" <td>7157.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28521615</th>\n",
" <td>Comprehensive Evaluation of (+)-Usnic Acid-ind...</td>\n",
" <td>Toxicologic pathology</td>\n",
" <td>10.1177/0192623317707074</td>\n",
" <td>7826.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17785938</th>\n",
" <td>Evaluation of human hepatocyte chimeric mice a...</td>\n",
" <td>The Journal of toxicological sciences</td>\n",
" <td>NaN</td>\n",
" <td>8472.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31486672</th>\n",
" <td>: A Novel Bayesian Network Structural Learning...</td>\n",
" <td>Journal of computational biology : a journal o...</td>\n",
" <td>10.1089/cmb.2019.0210</td>\n",
" <td>8932.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28994524</th>\n",
" <td>[Clinical value evaluation of Chinese herbal f...</td>\n",
" <td>Zhongguo Zhong yao za zhi = Zhongguo zhongyao ...</td>\n",
" <td>10.19540/j.cnki.cjcmm.20170103.001</td>\n",
" <td>21244.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21765119</th>\n",
" <td>A comparison of the cyclic variation in serum ...</td>\n",
" <td>Biological research for nursing</td>\n",
" <td>10.1177/1099800411412766</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24598031</th>\n",
" <td>A cross-omics toxicological evaluation of drin...</td>\n",
" <td>Journal of hazardous materials</td>\n",
" <td>10.1016/j.jhazmat.2014.02.007</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31344359</th>\n",
" <td>Before and After: Comparison of Legacy and Har...</td>\n",
" <td>Cell systems</td>\n",
" <td>10.1016/j.cels.2019.06.006</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32585153</th>\n",
" <td>Evaluation of Bunina et al.: Synthesizing Mult...</td>\n",
" <td>Cell systems</td>\n",
" <td>10.1016/j.cels.2020.06.002</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32156385</th>\n",
" <td>Foodomics evaluation of the anti-proliferative...</td>\n",
" <td>Food research international (Ottawa, Ont.)</td>\n",
" <td>10.1016/j.foodres.2019.108938</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32117598</th>\n",
" <td>Multiomics Evaluation of Human Fat-Derived Mes...</td>\n",
" <td>BioResearch open access</td>\n",
" <td>10.1089/biores.2020.0005</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" title \\\n",
"uid \n",
"30295871 Multi-omic and multi-view clustering algorithm... \n",
"30496480 Multi-omic and multi-view clustering algorithm... \n",
"22121217 The Stem Cell Discovery Engine: an integrated ... \n",
"32234303 Multiomics Evaluation of Gastrointestinal and ... \n",
"31154149 Quantitative CMR population imaging on 20,000 ... \n",
"30068331 Species comparison of liver proteomes reveals ... \n",
"32437529 Integrating multi-OMICS data through sparse Ca... \n",
"31792509 Clustering and variable selection evaluation o... \n",
"29688321 Comparison and evaluation of integrative metho... \n",
"31220206 Evaluation of integrative clustering methods f... \n",
"29272335 Multi-omics integration-a comparison of unsupe... \n",
"30368064 Multi-omics at single-cell resolution: compari... \n",
"32255618 Evaluation of Microbiome-Host Relationships in... \n",
"25414848 Multi-omic landscape of rheumatoid arthritis: ... \n",
"31292535 Multi-omic molecular comparison of primary ver... \n",
"30866779 Multi-omics comparisons of p-aminosalicylic ac... \n",
"22954204 Systematic comparison of reverse phase and hyd... \n",
"24216987 Metabolomic Dynamic Analysis of Hypoxia in MDA... \n",
"29936248 U-BIOPRED: evaluation of the value of a public... \n",
"31312416 An Evaluation of Machine Learning Approaches f... \n",
"29213276 An Integrated \"Multi-Omics\" Comparison of Embr... \n",
"29740416 SplinectomeR Enables Group Comparisons in Long... \n",
"29920461 Multi 'omics comparison reveals metabolome bio... \n",
"29212468 A comparison of graph- and kernel-based -omics... \n",
"30234027 Comparison of Fecal Collection Methods for Mic... \n",
"31792061 Evaluation of colorectal cancer subtypes and c... \n",
"31649733 CEPICS: A Comparison and Evaluation Platform f... \n",
"27929400 A Systematic Evaluation of Blood Serum and Pla... \n",
"32386347 Comparison of Proteomic Assessment Methods in ... \n",
"22796353 Effects of pre-storage leukoreduction on store... \n",
"32252356 Deep Functional Profiling Facilitates the Eval... \n",
"30596412 Growth Performance and Meat Quality Evaluation... \n",
"28927277 Benchmark Dose Modeling Estimates of the Conce... \n",
"31942259 A multiomics comparison between endometrial ca... \n",
"28804556 Anti-tumor efficacy evaluation of a novel mono... \n",
"21081137 Cross-study and cross-omics comparisons of thr... \n",
"28546903 Comprehensive reconstruction and evaluation of... \n",
"26495307 Cross-omics comparison of stress responses in ... \n",
"28521615 Comprehensive Evaluation of (+)-Usnic Acid-ind... \n",
"17785938 Evaluation of human hepatocyte chimeric mice a... \n",
"31486672 : A Novel Bayesian Network Structural Learning... \n",
"28994524 [Clinical value evaluation of Chinese herbal f... \n",
"21765119 A comparison of the cyclic variation in serum ... \n",
"24598031 A cross-omics toxicological evaluation of drin... \n",
"31344359 Before and After: Comparison of Legacy and Har... \n",
"32585153 Evaluation of Bunina et al.: Synthesizing Mult... \n",
"32156385 Foodomics evaluation of the anti-proliferative... \n",
"32117598 Multiomics Evaluation of Human Fat-Derived Mes... \n",
"\n",
" journal \\\n",
"uid \n",
"30295871 Nucleic acids research \n",
"30496480 Nucleic acids research \n",
"22121217 Nucleic acids research \n",
"32234303 Gastroenterology \n",
"31154149 Medical image analysis \n",
"30068331 BMC biology \n",
"32437529 Bioinformatics (Oxford, England) \n",
"31792509 Briefings in bioinformatics \n",
"29688321 Briefings in bioinformatics \n",
"31220206 Briefings in bioinformatics \n",
"29272335 Briefings in bioinformatics \n",
"30368064 Current opinion in biotechnology \n",
"32255618 Environmental science & technology \n",
"25414848 Frontiers in cell and developmental biology \n",
"31292535 British journal of cancer \n",
"30866779 Emerging microbes & infections \n",
"22954204 Analytical chemistry \n",
"24216987 Cancers \n",
"29936248 Drug discovery today \n",
"31312416 Computational and structural biotechnology jou... \n",
"29213276 Frontiers in plant science \n",
"29740416 Frontiers in microbiology \n",
"29920461 The Science of the total environment \n",
"29212468 BMC bioinformatics \n",
"30234027 Frontiers in cellular and infection microbiology \n",
"31792061 Life science alliance \n",
"31649733 Frontiers in genetics \n",
"27929400 International journal of molecular sciences \n",
"32386347 Proteomics \n",
"22796353 Journal of proteomics \n",
"32252356 Antibiotics (Basel, Switzerland) \n",
"30596412 Journal of agricultural and food chemistry \n",
"28927277 Chemical research in toxicology \n",
"31942259 PeerJ \n",
"28804556 American journal of translational research \n",
"21081137 Toxicology and applied pharmacology \n",
"28546903 Bioresources and bioprocessing \n",
"26495307 BioMed research international \n",
"28521615 Toxicologic pathology \n",
"17785938 The Journal of toxicological sciences \n",
"31486672 Journal of computational biology : a journal o... \n",
"28994524 Zhongguo Zhong yao za zhi = Zhongguo zhongyao ... \n",
"21765119 Biological research for nursing \n",
"24598031 Journal of hazardous materials \n",
"31344359 Cell systems \n",
"32585153 Cell systems \n",
"32156385 Food research international (Ottawa, Ont.) \n",
"32117598 BioResearch open access \n",
"\n",
" doi journal_sjr_rank \n",
"uid \n",
"30295871 10.1093/nar/gky889 90.0 \n",
"30496480 10.1093/nar/gky1226 90.0 \n",
"22121217 10.1093/nar/gkr1051 90.0 \n",
"32234303 10.1053/j.gastro.2020.03.045 169.0 \n",
"31154149 10.1016/j.media.2019.05.006 409.0 \n",
"30068331 10.1186/s12915-018-0547-y 447.0 \n",
"32437529 10.1093/bioinformatics/btaa530 484.0 \n",
"31792509 10.1093/bib/bbz138 625.0 \n",
"29688321 10.1093/bib/bby027 625.0 \n",
"31220206 10.1093/bib/bbz015 625.0 \n",
"29272335 10.1093/bib/bbx167 625.0 \n",
"30368064 10.1016/j.copbio.2018.09.012 740.0 \n",
"32255618 10.1021/acs.est.0c00628 797.0 \n",
"25414848 10.3389/fcell.2014.00059 862.0 \n",
"31292535 10.1038/s41416-019-0507-5 941.0 \n",
"30866779 10.1080/22221751.2019.1568179 1103.0 \n",
"22954204 10.1021/ac3012494 1178.0 \n",
"24216987 10.3390/cancers5020491 1388.0 \n",
"29936248 10.1016/j.drudis.2018.06.015 1444.0 \n",
"31312416 10.1016/j.csbj.2019.05.008 1637.0 \n",
"29213276 10.3389/fpls.2017.01984 1794.0 \n",
"29740416 10.3389/fmicb.2018.00785 1798.0 \n",
"29920461 10.1016/j.scitotenv.2018.05.256 1854.0 \n",
"29212468 10.1186/s12859-017-1982-4 1930.0 \n",
"30234027 10.3389/fcimb.2018.00301 1931.0 \n",
"31792061 10.26508/lsa.201900517 2050.0 \n",
"31649733 10.3389/fgene.2019.00966 2313.0 \n",
"27929400 NaN 2808.0 \n",
"32386347 10.1002/pmic.201900278 2944.0 \n",
"22796353 10.1016/j.jprot.2012.06.032 3300.0 \n",
"32252356 10.3390/antibiotics9040157 3371.0 \n",
"30596412 10.1021/acs.jafc.8b05477 3803.0 \n",
"28927277 10.1021/acs.chemrestox.7b00221 4330.0 \n",
"31942259 10.7717/peerj.8347 4381.0 \n",
"28804556 NaN 4480.0 \n",
"21081137 10.1016/j.taap.2010.11.006 4714.0 \n",
"28546903 10.1186/s40643-017-0152-x 5398.0 \n",
"26495307 10.1155/2015/628158 7157.0 \n",
"28521615 10.1177/0192623317707074 7826.0 \n",
"17785938 NaN 8472.0 \n",
"31486672 10.1089/cmb.2019.0210 8932.0 \n",
"28994524 10.19540/j.cnki.cjcmm.20170103.001 21244.0 \n",
"21765119 10.1177/1099800411412766 NaN \n",
"24598031 10.1016/j.jhazmat.2014.02.007 NaN \n",
"31344359 10.1016/j.cels.2019.06.006 NaN \n",
"32585153 10.1016/j.cels.2020.06.002 NaN \n",
"32156385 10.1016/j.foodres.2019.108938 NaN \n",
"32117598 10.1089/biores.2020.0005 NaN "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"literature[literature.title.str.lower().str.contains('|'.join(['benchmark', 'evaluation', 'comparison']))].pipe(display_sorted)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Biomarkers"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>journal</th>\n",
" <th>doi</th>\n",
" <th>journal_sjr_rank</th>\n",
" </tr>\n",
" <tr>\n",
" <th>uid</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>31563876</th>\n",
" <td>Exploiting differential Wnt target gene expres...</td>\n",
" <td>Gut</td>\n",
" <td>10.1136/gutjnl-2019-319126</td>\n",
" <td>121.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28838933</th>\n",
" <td>Epigenome-Wide Association Study Identifies Ca...</td>\n",
" <td>Circulation</td>\n",
" <td>10.1161/CIRCULATIONAHA.117.027355</td>\n",
" <td>142.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24859455</th>\n",
" <td>Identification of prognostic biomarkers in hep...</td>\n",
" <td>Journal of hepatology</td>\n",
" <td>10.1016/j.jhep.2014.05.025</td>\n",
" <td>171.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31501510</th>\n",
" <td>Multi-omic biomarker identification and valida...</td>\n",
" <td>Molecular psychiatry</td>\n",
" <td>10.1038/s41380-019-0496-z</td>\n",
" <td>187.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31312043</th>\n",
" <td>Multi-omics in IBD biomarker discovery: the mi...</td>\n",
" <td>Nature reviews. Gastroenterology & hepatology</td>\n",
" <td>10.1038/s41575-019-0188-9</td>\n",
" <td>196.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29764059</th>\n",
" <td>Leveraging next-generation phenotyping and pan...</td>\n",
" <td>Personalized medicine</td>\n",
" <td>10.2217/pme.14.6</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31882086</th>\n",
" <td>Meta-proteomics for the discovery of protein b...</td>\n",
" <td>Food research international (Ottawa, Ont.)</td>\n",
" <td>10.1016/j.foodres.2019.108739</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29781548</th>\n",
" <td>Multi-omics in high-grade serous ovarian cance...</td>\n",
" <td>American journal of reproductive immunology (N...</td>\n",
" <td>10.1111/aji.12975</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29686467</th>\n",
" <td>Multiomics biomarkers for the prediction of no...</td>\n",
" <td>World journal of gastroenterology</td>\n",
" <td>10.3748/wjg.v24.i15.1601</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26312246</th>\n",
" <td>Wading through the noise of \"multi-omics\" to i...</td>\n",
" <td>Hepatobiliary surgery and nutrition</td>\n",
" <td>10.3978/j.issn.2304-3881.2015.04.05</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>141 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" title \\\n",
"uid \n",
"31563876 Exploiting differential Wnt target gene expres... \n",
"28838933 Epigenome-Wide Association Study Identifies Ca... \n",
"24859455 Identification of prognostic biomarkers in hep... \n",
"31501510 Multi-omic biomarker identification and valida... \n",
"31312043 Multi-omics in IBD biomarker discovery: the mi... \n",
"... ... \n",
"29764059 Leveraging next-generation phenotyping and pan... \n",
"31882086 Meta-proteomics for the discovery of protein b... \n",
"29781548 Multi-omics in high-grade serous ovarian cance... \n",
"29686467 Multiomics biomarkers for the prediction of no... \n",
"26312246 Wading through the noise of \"multi-omics\" to i... \n",
"\n",
" journal \\\n",
"uid \n",
"31563876 Gut \n",
"28838933 Circulation \n",
"24859455 Journal of hepatology \n",
"31501510 Molecular psychiatry \n",
"31312043 Nature reviews. Gastroenterology & hepatology \n",
"... ... \n",
"29764059 Personalized medicine \n",
"31882086 Food research international (Ottawa, Ont.) \n",
"29781548 American journal of reproductive immunology (N... \n",
"29686467 World journal of gastroenterology \n",
"26312246 Hepatobiliary surgery and nutrition \n",
"\n",
" doi journal_sjr_rank \n",
"uid \n",
"31563876 10.1136/gutjnl-2019-319126 121.0 \n",
"28838933 10.1161/CIRCULATIONAHA.117.027355 142.0 \n",
"24859455 10.1016/j.jhep.2014.05.025 171.0 \n",
"31501510 10.1038/s41380-019-0496-z 187.0 \n",
"31312043 10.1038/s41575-019-0188-9 196.0 \n",
"... ... ... \n",
"29764059 10.2217/pme.14.6 NaN \n",
"31882086 10.1016/j.foodres.2019.108739 NaN \n",
"29781548 10.1111/aji.12975 NaN \n",
"29686467 10.3748/wjg.v24.i15.1601 NaN \n",
"26312246 10.3978/j.issn.2304-3881.2015.04.05 NaN \n",
"\n",
"[141 rows x 4 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"literature[literature.title.str.lower().str.contains('|'.join(['biomarker']))].pipe(display_sorted)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>journal</th>\n",
" <th>doi</th>\n",
" <th>journal_sjr_rank</th>\n",
" </tr>\n",
" <tr>\n",
" <th>uid</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>29479082</th>\n",
" <td>Integrative omics for health and disease.</td>\n",
" <td>Nature reviews. Genetics</td>\n",
" <td>10.1038/nrg.2018.4</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31925408</th>\n",
" <td>Multi-omics shows the (default) way.</td>\n",
" <td>Nature reviews. Genetics</td>\n",
" <td>10.1038/s41576-020-0211-6</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28970588</th>\n",
" <td>Technique: CRISPR CAPTURE for multi-omic probi...</td>\n",
" <td>Nature reviews. Genetics</td>\n",
" <td>10.1038/nrg.2017.79</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31491384</th>\n",
" <td>Antibiotics-Driven Gut Microbiome Perturbation...</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2019.08.010</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29100071</th>\n",
" <td>Cancer Evolution during Immunotherapy.</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2017.10.027</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30595449</th>\n",
" <td>Gene Regulatory Programs Conferring Phenotypic...</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2018.11.045</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27863251</th>\n",
" <td>Genetic Drivers of Epigenetic and Transcriptio...</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2016.10.026</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31585088</th>\n",
" <td>Integrated Proteogenomic Characterization of H...</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2019.08.052</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32059783</th>\n",
" <td>Lymphoma Driver Mutations in the Pathogenic Ev...</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2020.01.029</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27912059</th>\n",
" <td>Microbiota Diurnal Rhythmicity Programs Host T...</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2016.11.003</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32470399</th>\n",
" <td>Molecular Choreography of Acute Exercise.</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2020.04.043</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32589957</th>\n",
" <td>Molecular Transducers of Physical Activity Con...</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2020.06.004</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32579974</th>\n",
" <td>Multimodal Analysis of Composition and Spatial...</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2020.05.039</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30193112</th>\n",
" <td>Personalized Gut Mucosal Colonization Resistan...</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2018.08.041</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32649874</th>\n",
" <td>Proteogenomic Characterization Reveals Therape...</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2020.06.013</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32059776</th>\n",
" <td>Proteogenomic Characterization of Endometrial ...</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2020.01.026</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32649875</th>\n",
" <td>Proteogenomics of Non-smoking Lung Cancer in E...</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2020.06.012</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29677503</th>\n",
" <td>Revolutionizing Precision Oncology through Col...</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2018.04.008</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29328914</th>\n",
" <td>Rewiring of the Fruit Metabolome in Tomato Bre...</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2017.12.019</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31178122</th>\n",
" <td>Single-Cell Multi-omic Integration Compares an...</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2019.05.006</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" title \\\n",
"uid \n",
"29479082 Integrative omics for health and disease. \n",
"31925408 Multi-omics shows the (default) way. \n",
"28970588 Technique: CRISPR CAPTURE for multi-omic probi... \n",
"31491384 Antibiotics-Driven Gut Microbiome Perturbation... \n",
"29100071 Cancer Evolution during Immunotherapy. \n",
"30595449 Gene Regulatory Programs Conferring Phenotypic... \n",
"27863251 Genetic Drivers of Epigenetic and Transcriptio... \n",
"31585088 Integrated Proteogenomic Characterization of H... \n",
"32059783 Lymphoma Driver Mutations in the Pathogenic Ev... \n",
"27912059 Microbiota Diurnal Rhythmicity Programs Host T... \n",
"32470399 Molecular Choreography of Acute Exercise. \n",
"32589957 Molecular Transducers of Physical Activity Con... \n",
"32579974 Multimodal Analysis of Composition and Spatial... \n",
"30193112 Personalized Gut Mucosal Colonization Resistan... \n",
"32649874 Proteogenomic Characterization Reveals Therape... \n",
"32059776 Proteogenomic Characterization of Endometrial ... \n",
"32649875 Proteogenomics of Non-smoking Lung Cancer in E... \n",
"29677503 Revolutionizing Precision Oncology through Col... \n",
"29328914 Rewiring of the Fruit Metabolome in Tomato Bre... \n",
"31178122 Single-Cell Multi-omic Integration Compares an... \n",
"\n",
" journal doi \\\n",
"uid \n",
"29479082 Nature reviews. Genetics 10.1038/nrg.2018.4 \n",
"31925408 Nature reviews. Genetics 10.1038/s41576-020-0211-6 \n",
"28970588 Nature reviews. Genetics 10.1038/nrg.2017.79 \n",
"31491384 Cell 10.1016/j.cell.2019.08.010 \n",
"29100071 Cell 10.1016/j.cell.2017.10.027 \n",
"30595449 Cell 10.1016/j.cell.2018.11.045 \n",
"27863251 Cell 10.1016/j.cell.2016.10.026 \n",
"31585088 Cell 10.1016/j.cell.2019.08.052 \n",
"32059783 Cell 10.1016/j.cell.2020.01.029 \n",
"27912059 Cell 10.1016/j.cell.2016.11.003 \n",
"32470399 Cell 10.1016/j.cell.2020.04.043 \n",
"32589957 Cell 10.1016/j.cell.2020.06.004 \n",
"32579974 Cell 10.1016/j.cell.2020.05.039 \n",
"30193112 Cell 10.1016/j.cell.2018.08.041 \n",
"32649874 Cell 10.1016/j.cell.2020.06.013 \n",
"32059776 Cell 10.1016/j.cell.2020.01.026 \n",
"32649875 Cell 10.1016/j.cell.2020.06.012 \n",
"29677503 Cell 10.1016/j.cell.2018.04.008 \n",
"29328914 Cell 10.1016/j.cell.2017.12.019 \n",
"31178122 Cell 10.1016/j.cell.2019.05.006 \n",
"\n",
" journal_sjr_rank \n",
"uid \n",
"29479082 7.0 \n",
"31925408 7.0 \n",
"28970588 7.0 \n",
"31491384 8.0 \n",
"29100071 8.0 \n",
"30595449 8.0 \n",
"27863251 8.0 \n",
"31585088 8.0 \n",
"32059783 8.0 \n",
"27912059 8.0 \n",
"32470399 8.0 \n",
"32589957 8.0 \n",
"32579974 8.0 \n",
"30193112 8.0 \n",
"32649874 8.0 \n",
"32059776 8.0 \n",
"32649875 8.0 \n",
"29677503 8.0 \n",
"29328914 8.0 \n",
"31178122 8.0 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"literature.pipe(display_sorted).head(20)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Discoveries?"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>journal</th>\n",
" <th>doi</th>\n",
" <th>journal_sjr_rank</th>\n",
" </tr>\n",
" <tr>\n",
" <th>uid</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>32649874</th>\n",
" <td>Proteogenomic Characterization Reveals Therape...</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2020.06.013</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29328921</th>\n",
" <td>Tomato Multiomics Reveals Consequences of Crop...</td>\n",
" <td>Cell</td>\n",
" <td>10.1016/j.cell.2017.12.036</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28285833</th>\n",
" <td>Integrative Proteomics and Phosphoproteomics P...</td>\n",
" <td>Immunity</td>\n",
" <td>10.1016/j.immuni.2017.02.010</td>\n",
" <td>50.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29898900</th>\n",
" <td>Principled multi-omic analysis reveals gene re...</td>\n",
" <td>Genome research</td>\n",
" <td>10.1101/gr.227066.117</td>\n",
" <td>84.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32182340</th>\n",
" <td>Multi-omic analysis of gametogenesis reveals a...</td>\n",
" <td>Nucleic acids research</td>\n",
" <td>10.1093/nar/gkaa163</td>\n",
" <td>90.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31227589</th>\n",
" <td>Integrated multiomic analysis reveals comprehe...</td>\n",
" <td>Gut</td>\n",
" <td>10.1136/gutjnl-2019-318912</td>\n",
" <td>121.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30268436</th>\n",
" <td>A Pan-Cancer Analysis Reveals High-Frequency G...</td>\n",
" <td>Cell systems</td>\n",
" <td>10.1016/j.cels.2018.08.010</td>\n",
" <td>131.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28544881</th>\n",
" <td>Mammalian Systems Biotechnology Reveals Global...</td>\n",
" <td>Cell systems</td>\n",
" <td>10.1016/j.cels.2017.04.009</td>\n",
" <td>131.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26446169</th>\n",
" <td>A Cross-Species Analysis in Pancreatic Neuroen...</td>\n",
" <td>Cancer discovery</td>\n",
" <td>10.1158/2159-8290.CD-15-0068</td>\n",
" <td>137.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29588317</th>\n",
" <td>Spatiotemporal Multi-Omics Mapping Generates a...</td>\n",
" <td>Circulation</td>\n",
" <td>10.1161/CIRCULATIONAHA.117.032291</td>\n",
" <td>142.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27132591</th>\n",
" <td>Integrative Multi-omic Analysis of Human Plate...</td>\n",
" <td>American journal of human genetics</td>\n",
" <td>10.1016/j.ajhg.2016.03.007</td>\n",
" <td>143.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32023468</th>\n",
" <td>Inverse Data-Driven Modeling and Multiomics An...</td>\n",
" <td>Cell reports</td>\n",
" <td>10.1016/j.celrep.2020.01.011</td>\n",
" <td>203.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32522993</th>\n",
" <td>A multi-omics analysis reveals the unfolded pr...</td>\n",
" <td>Nature communications</td>\n",
" <td>10.1038/s41467-020-16747-y</td>\n",
" <td>238.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30692544</th>\n",
" <td>Deconvolution of single-cell multi-omics layer...</td>\n",
" <td>Nature communications</td>\n",
" <td>10.1038/s41467-018-08205-7</td>\n",
" <td>238.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31604924</th>\n",
" <td>Integrated molecular characterization of chond...</td>\n",
" <td>Nature communications</td>\n",
" <td>10.1038/s41467-019-12525-7</td>\n",
" <td>238.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29500431</th>\n",
" <td>Integrative analysis of omics summary data rev...</td>\n",
" <td>Nature communications</td>\n",
" <td>10.1038/s41467-018-03371-0</td>\n",
" <td>238.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29615613</th>\n",
" <td>Multi-omics analysis reveals neoantigen-indepe...</td>\n",
" <td>Nature communications</td>\n",
" <td>10.1038/s41467-018-03730-x</td>\n",
" <td>238.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29713003</th>\n",
" <td>Multi-omics profiling of younger Asian breast ...</td>\n",
" <td>Nature communications</td>\n",
" <td>10.1038/s41467-018-04129-4</td>\n",
" <td>238.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32312967</th>\n",
" <td>Nitrogen limitation reveals large reserves in ...</td>\n",
" <td>Nature communications</td>\n",
" <td>10.1038/s41467-020-15749-0</td>\n",
" <td>238.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28408661</th>\n",
" <td>Multi-Omics of Tomato Glandular Trichomes Reve...</td>\n",
" <td>The Plant cell</td>\n",
" <td>10.1105/tpc.17.00060</td>\n",
" <td>254.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" title \\\n",
"uid \n",
"32649874 Proteogenomic Characterization Reveals Therape... \n",
"29328921 Tomato Multiomics Reveals Consequences of Crop... \n",
"28285833 Integrative Proteomics and Phosphoproteomics P... \n",
"29898900 Principled multi-omic analysis reveals gene re... \n",
"32182340 Multi-omic analysis of gametogenesis reveals a... \n",
"31227589 Integrated multiomic analysis reveals comprehe... \n",
"30268436 A Pan-Cancer Analysis Reveals High-Frequency G... \n",
"28544881 Mammalian Systems Biotechnology Reveals Global... \n",
"26446169 A Cross-Species Analysis in Pancreatic Neuroen... \n",
"29588317 Spatiotemporal Multi-Omics Mapping Generates a... \n",
"27132591 Integrative Multi-omic Analysis of Human Plate... \n",
"32023468 Inverse Data-Driven Modeling and Multiomics An... \n",
"32522993 A multi-omics analysis reveals the unfolded pr... \n",
"30692544 Deconvolution of single-cell multi-omics layer... \n",
"31604924 Integrated molecular characterization of chond... \n",
"29500431 Integrative analysis of omics summary data rev... \n",
"29615613 Multi-omics analysis reveals neoantigen-indepe... \n",
"29713003 Multi-omics profiling of younger Asian breast ... \n",
"32312967 Nitrogen limitation reveals large reserves in ... \n",
"28408661 Multi-Omics of Tomato Glandular Trichomes Reve... \n",
"\n",
" journal \\\n",
"uid \n",
"32649874 Cell \n",
"29328921 Cell \n",
"28285833 Immunity \n",
"29898900 Genome research \n",
"32182340 Nucleic acids research \n",
"31227589 Gut \n",
"30268436 Cell systems \n",
"28544881 Cell systems \n",
"26446169 Cancer discovery \n",
"29588317 Circulation \n",
"27132591 American journal of human genetics \n",
"32023468 Cell reports \n",
"32522993 Nature communications \n",
"30692544 Nature communications \n",
"31604924 Nature communications \n",
"29500431 Nature communications \n",
"29615613 Nature communications \n",
"29713003 Nature communications \n",
"32312967 Nature communications \n",
"28408661 The Plant cell \n",
"\n",
" doi journal_sjr_rank \n",
"uid \n",
"32649874 10.1016/j.cell.2020.06.013 8.0 \n",
"29328921 10.1016/j.cell.2017.12.036 8.0 \n",
"28285833 10.1016/j.immuni.2017.02.010 50.0 \n",
"29898900 10.1101/gr.227066.117 84.0 \n",
"32182340 10.1093/nar/gkaa163 90.0 \n",
"31227589 10.1136/gutjnl-2019-318912 121.0 \n",
"30268436 10.1016/j.cels.2018.08.010 131.0 \n",
"28544881 10.1016/j.cels.2017.04.009 131.0 \n",
"26446169 10.1158/2159-8290.CD-15-0068 137.0 \n",
"29588317 10.1161/CIRCULATIONAHA.117.032291 142.0 \n",
"27132591 10.1016/j.ajhg.2016.03.007 143.0 \n",
"32023468 10.1016/j.celrep.2020.01.011 203.0 \n",
"32522993 10.1038/s41467-020-16747-y 238.0 \n",
"30692544 10.1038/s41467-018-08205-7 238.0 \n",
"31604924 10.1038/s41467-019-12525-7 238.0 \n",
"29500431 10.1038/s41467-018-03371-0 238.0 \n",
"29615613 10.1038/s41467-018-03730-x 238.0 \n",
"29713003 10.1038/s41467-018-04129-4 238.0 \n",
"32312967 10.1038/s41467-020-15749-0 238.0 \n",
"28408661 10.1105/tpc.17.00060 254.0 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"literature[literature.title.str.lower().str.contains('|'.join(['reveals']))].pipe(display_sorted).head(20)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Affiliations"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Most authors with given affiliation on papers:"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Affiliation</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Department of Genetics, Stanford University School of Medicine, Stanford, CA, USA.</th>\n",
" <td>58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Tohoku Medical Megabank Organization, Tohoku University, Sendai, Japan.</th>\n",
" <td>48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, China.</th>\n",
" <td>46</td>\n",
" </tr>\n",
" <tr>\n",
" <th>The European Molecular Biology Laboratory, The European Bioinformatics Institute, The Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SD, UK.</th>\n",
" <td>39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, Heilongjiang 150086, China.</th>\n",
" <td>32</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Affiliation\n",
"index \n",
"Department of Genetics, Stanford University Sch... 58\n",
"Tohoku Medical Megabank Organization, Tohoku Un... 48\n",
"College of Bioinformatics Science and Technolog... 46\n",
"The European Molecular Biology Laboratory, The ... 39\n",
"College of Bioinformatics Science and Technolog... 32"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"affiliations.Affiliation.sorted_value_counts().head(5).to_frame()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Most papers with given affiliation:"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Affiliation</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>M&M Medical BioInformatics, Hongo 113-0033, Japan.</th>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, China.</th>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Department of Genetics, Stanford University School of Medicine, Stanford, CA, USA.</th>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>University of Chinese Academy of Sciences, Beijing 100049, China.</th>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Institute for Systems Biology, Seattle, WA, USA.</th>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Luxembourg Centre for Systems Biomedicine, University of Luxembourg, Esch-sur-Alzette, Luxembourg.</th>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Mosaiques Diagnostics GmbH, Hannover, Germany.</th>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Biological Sciences Division, Pacific Northwest National Laboratory, Richland, WA, USA.</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Blavatnik School of Computer Science, Tel Aviv University, Tel Aviv, Israel.</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, Heilongjiang 150086, China.</th>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Affiliation\n",
"index \n",
"M&M Medical BioInformatics, Hongo 113-0033, Japan. 16\n",
"College of Bioinformatics Science and Technolog... 7\n",
"Department of Genetics, Stanford University Sch... 7\n",
"University of Chinese Academy of Sciences, Beij... 7\n",
"Institute for Systems Biology, Seattle, WA, USA. 5\n",
"Luxembourg Centre for Systems Biomedicine, Univ... 5\n",
"Mosaiques Diagnostics GmbH, Hannover, Germany. 5\n",
"Biological Sciences Division, Pacific Northwest... 4\n",
"Blavatnik School of Computer Science, Tel Aviv ... 4\n",
"College of Bioinformatics Science and Technolog... 4"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"affiliations[['Affiliation', 'PMID']].drop_duplicates().Affiliation.sorted_value_counts().head(10).to_frame()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We were previously getting false hits because we were matching by affiliations:\n",
" - \"Multi-Omics Based Creative Drug Research Team, Kyungpook National University, Daegu 41566, Republic of Korea\"\n",
" - \"Panomics, Inc\"\n",
" \n",
"so it is important to check if no affiliations overrepresented. Would need a cleanup to be more reliable (not a priority)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Authors"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note: not neccessarily unique persons, adoption of ORCID still low:"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>JointName</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Masaru Katoh</th>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Richard D Smith</th>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Paul Wilmes</th>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jens Nielsen</th>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Young-Mo Kim</th>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Thomas O Metz</th>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Xia Li</th>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bernhard O Palsson</th>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Yuriko Katoh</th>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bing Zhang</th>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>James G Wilson</th>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Li Wang</th>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Carrie D Nicora</th>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Claudio Angione</th>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jerome I Rotter</th>\n",
" <td>12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" JointName\n",
"index \n",
"Masaru Katoh 26\n",
"Richard D Smith 19\n",
"Paul Wilmes 18\n",
"Jens Nielsen 17\n",
"Young-Mo Kim 17\n",
"Thomas O Metz 16\n",
"Xia Li 16\n",
"Bernhard O Palsson 14\n",
"Yuriko Katoh 14\n",
"Bing Zhang 13\n",
"James G Wilson 13\n",
"Li Wang 13\n",
"Carrie D Nicora 12\n",
"Claudio Angione 12\n",
"Jerome I Rotter 12"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"authors['JointName'].sorted_value_counts().head(15).to_frame()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Publication kind and type"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"index\n",
"article 3453\n",
"article in book 3\n",
"Name: kind, dtype: int64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"literature.kind.sorted_value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"744"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sum(literature['Is Review'] == True)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Journal Article</th>\n",
" <td>3370</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Research Support, Non-U.S. Gov't</th>\n",
" <td>1371</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Review</th>\n",
" <td>744</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Research Support, N.I.H., Extramural</th>\n",
" <td>460</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Research Support, U.S. Gov't, Non-P.H.S.</th>\n",
" <td>161</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Comparative Study</th>\n",
" <td>61</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Editorial</th>\n",
" <td>44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Comment</th>\n",
" <td>37</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Clinical Trial</th>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Published Erratum</th>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Multicenter Study</th>\n",
" <td>21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Research Support, N.I.H., Intramural</th>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Evaluation Study</th>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Letter</th>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Case Reports</th>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Dataset</th>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Introductory Journal Article</th>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Observational Study</th>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Twin Study</th>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Validation Study</th>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>English Abstract</th>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Randomized Controlled Trial</th>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Systematic Review</th>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Video-Audio Media</th>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Meta-Analysis</th>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Research Support, U.S. Gov't, P.H.S.</th>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Congress</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Interview</th>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>News</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Clinical Trial, Phase II</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Consensus Development Conference, NIH</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Controlled Clinical Trial</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Historical Article</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Practice Guideline</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Preprint</th>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" count\n",
"index \n",
"Journal Article 3370\n",
"Research Support, Non-U.S. Gov't 1371\n",
"Review 744\n",
"Research Support, N.I.H., Extramural 460\n",
"Research Support, U.S. Gov't, Non-P.H.S. 161\n",
"Comparative Study 61\n",
"Editorial 44\n",
"Comment 37\n",
"Clinical Trial 26\n",
"Published Erratum 23\n",
"Multicenter Study 21\n",
"Research Support, N.I.H., Intramural 16\n",
"Evaluation Study 13\n",
"Letter 13\n",
"Case Reports 9\n",
"Dataset 9\n",
"Introductory Journal Article 7\n",
"Observational Study 7\n",
"Twin Study 7\n",
"Validation Study 7\n",
"English Abstract 6\n",
"Randomized Controlled Trial 6\n",
"Systematic Review 6\n",
"Video-Audio Media 6\n",
"Meta-Analysis 5\n",
"Research Support, U.S. Gov't, P.H.S. 5\n",
"Congress 4\n",
"Interview 3\n",
"News 2\n",
"Clinical Trial, Phase II 1\n",
"Consensus Development Conference, NIH 1\n",
"Controlled Clinical Trial 1\n",
"Historical Article 1\n",
"Practice Guideline 1\n",
"Preprint 1"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"publication_types['0'].sorted_value_counts().to_frame('count')"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>doi</th>\n",
" </tr>\n",
" <tr>\n",
" <th>uid</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>21889780</th>\n",
" <td>[OMICS and biomarkers of glial tumors].</td>\n",
" <td>10.1016/j.neurol.2011.07.007</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22490743</th>\n",
" <td>[Application of an integrated omics analysis f...</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24292186</th>\n",
" <td>[Biomarker exploration and its clinical use].</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25744640</th>\n",
" <td>[Gut microbiota, host defense and immunity: an...</td>\n",
" <td>10.2177/jsci.37.403</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26351165</th>\n",
" <td>[Identification of disease targets for precisi...</td>\n",
" <td>10.16288/j.yczz.15-061</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32694108</th>\n",
" <td>[Comprehensive re-annotation of protein-coding...</td>\n",
" <td>10.16288/j.yczz.20-022</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" title \\\n",
"uid \n",
"21889780 [OMICS and biomarkers of glial tumors]. \n",
"22490743 [Application of an integrated omics analysis f... \n",
"24292186 [Biomarker exploration and its clinical use]. \n",
"25744640 [Gut microbiota, host defense and immunity: an... \n",
"26351165 [Identification of disease targets for precisi... \n",
"32694108 [Comprehensive re-annotation of protein-coding... \n",
"\n",
" doi \n",
"uid \n",
"21889780 10.1016/j.neurol.2011.07.007 \n",
"22490743 NaN \n",
"24292186 NaN \n",
"25744640 10.2177/jsci.37.403 \n",
"26351165 10.16288/j.yczz.15-061 \n",
"32694108 10.16288/j.yczz.20-022 "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"literature[literature['Is English Abstract'] == True][['title', 'doi']]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>doi</th>\n",
" </tr>\n",
" <tr>\n",
" <th>uid</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>22424393</th>\n",
" <td>Q & A: the Snyderome.</td>\n",
" <td>10.1186/gb-2012-13-3-147</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31429661</th>\n",
" <td>Interview with Prof. K. Yalçın Arga: A Pioneer...</td>\n",
" <td>10.1089/omi.2019.0131</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31755849</th>\n",
" <td>Interview with Prof. Collet Dandara: A Pioneer...</td>\n",
" <td>10.1089/omi.2019.0174</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" title \\\n",
"uid \n",
"22424393 Q & A: the Snyderome. \n",
"31429661 Interview with Prof. K. Yalçın Arga: A Pioneer... \n",
"31755849 Interview with Prof. Collet Dandara: A Pioneer... \n",
"\n",
" doi \n",
"uid \n",
"22424393 10.1186/gb-2012-13-3-147 \n",
"31429661 10.1089/omi.2019.0131 \n",
"31755849 10.1089/omi.2019.0174 "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"literature[literature['Is Interview'] == True][['title', 'doi']]"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>doi</th>\n",
" </tr>\n",
" <tr>\n",
" <th>uid</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>12186644</th>\n",
" <td>Integromics: challenges in data integration.</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27164162</th>\n",
" <td>Emergence of Biomolecular Pathways to Define N...</td>\n",
" <td>10.1165/rcmb.2016-0141PS</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29339647</th>\n",
" <td>Proceedings of the 11th Congress of the Intern...</td>\n",
" <td>10.1159/000485799</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31394729</th>\n",
" <td>Novel Strategies for Cancer Treatment: Highlig...</td>\n",
" <td>10.3390/cancers11081125</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" title \\\n",
"uid \n",
"12186644 Integromics: challenges in data integration. \n",
"27164162 Emergence of Biomolecular Pathways to Define N... \n",
"29339647 Proceedings of the 11th Congress of the Intern... \n",
"31394729 Novel Strategies for Cancer Treatment: Highlig... \n",
"\n",
" doi \n",
"uid \n",
"12186644 NaN \n",
"27164162 10.1165/rcmb.2016-0141PS \n",
"29339647 10.1159/000485799 \n",
"31394729 10.3390/cancers11081125 "
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"literature[literature['Is Congress'] == True][['title', 'doi']]"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>doi</th>\n",
" </tr>\n",
" <tr>\n",
" <th>uid</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>29301609</th>\n",
" <td>Integration of metabolomics and transcriptomic...</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32475383</th>\n",
" <td>From genome sequencing to the discovery of pot...</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" title doi\n",
"uid \n",
"29301609 Integration of metabolomics and transcriptomic... NaN\n",
"32475383 From genome sequencing to the discovery of pot... NaN"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"literature[literature['Is News'] == True][['title', 'doi']]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>doi</th>\n",
" </tr>\n",
" <tr>\n",
" <th>uid</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>25632108</th>\n",
" <td>CyanOmics: an integrated database of omics for...</td>\n",
" <td>10.1093/database/bau127</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26130662</th>\n",
" <td>toxoMine: an integrated omics data warehouse f...</td>\n",
" <td>10.1093/database/bav066</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26646939</th>\n",
" <td>Multi-omic profiles of human non-alcoholic fat...</td>\n",
" <td>10.1038/sdata.2015.68</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27504011</th>\n",
" <td>MODEM: multi-omics data envelopment and mining...</td>\n",
" <td>10.1093/database/baw117</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29087370</th>\n",
" <td>Monitoring microbial responses to ocean deoxyg...</td>\n",
" <td>10.1038/sdata.2017.158</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30084846</th>\n",
" <td>A multi-omic atlas of the human frontal cortex...</td>\n",
" <td>10.1038/sdata.2018.142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30204156</th>\n",
" <td>The Mount Sinai cohort of large-scale genomic,...</td>\n",
" <td>10.1038/sdata.2018.185</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30621600</th>\n",
" <td>The 1000IBD project: multi-omics data of 1000 ...</td>\n",
" <td>10.1186/s12876-018-0917-5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31201317</th>\n",
" <td>Multi omics analysis of fibrotic kidneys in tw...</td>\n",
" <td>10.1038/s41597-019-0095-5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" title \\\n",
"uid \n",
"25632108 CyanOmics: an integrated database of omics for... \n",
"26130662 toxoMine: an integrated omics data warehouse f... \n",
"26646939 Multi-omic profiles of human non-alcoholic fat... \n",
"27504011 MODEM: multi-omics data envelopment and mining... \n",
"29087370 Monitoring microbial responses to ocean deoxyg... \n",
"30084846 A multi-omic atlas of the human frontal cortex... \n",
"30204156 The Mount Sinai cohort of large-scale genomic,... \n",
"30621600 The 1000IBD project: multi-omics data of 1000 ... \n",
"31201317 Multi omics analysis of fibrotic kidneys in tw... \n",
"\n",
" doi \n",
"uid \n",
"25632108 10.1093/database/bau127 \n",
"26130662 10.1093/database/bav066 \n",
"26646939 10.1038/sdata.2015.68 \n",
"27504011 10.1093/database/baw117 \n",
"29087370 10.1038/sdata.2017.158 \n",
"30084846 10.1038/sdata.2018.142 \n",
"30204156 10.1038/sdata.2018.185 \n",
"30621600 10.1186/s12876-018-0917-5 \n",
"31201317 10.1038/s41597-019-0095-5 "
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"literature[literature['Is Dataset'] == True][['title', 'doi']]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Journals"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"journal_freq = literature.journal.sorted_value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>journal</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Scientific reports</th>\n",
" <td>126</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Omics : a journal of integrative biology</th>\n",
" <td>78</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PloS one</th>\n",
" <td>69</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bioinformatics (Oxford, England)</th>\n",
" <td>68</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Nature communications</th>\n",
" <td>58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Frontiers in genetics</th>\n",
" <td>55</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Journal of proteomics</th>\n",
" <td>53</td>\n",
" </tr>\n",
" <tr>\n",
" <th>BMC bioinformatics</th>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Nucleic acids research</th>\n",
" <td>45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Methods in molecular biology (Clifton, N.J.)</th>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Journal of proteome research</th>\n",
" <td>42</td>\n",
" </tr>\n",
" <tr>\n",
" <th>BMC genomics</th>\n",
" <td>41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Oncotarget</th>\n",
" <td>39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>International journal of molecular sciences</th>\n",
" <td>37</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Briefings in bioinformatics</th>\n",
" <td>36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Frontiers in microbiology</th>\n",
" <td>34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Molecular & cellular proteomics : MCP</th>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mSystems</th>\n",
" <td>28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Cell reports</th>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Metabolites</th>\n",
" <td>26</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" journal\n",
"index \n",
"Scientific reports 126\n",
"Omics : a journal of integrative biology 78\n",
"PloS one 69\n",
"Bioinformatics (Oxford, England) 68\n",
"Nature communications 58\n",
"Frontiers in genetics 55\n",
"Journal of proteomics 53\n",
"BMC bioinformatics 52\n",
"Nucleic acids research 45\n",
"Methods in molecular biology (Clifton, N.J.) 43\n",
"Journal of proteome research 42\n",
"BMC genomics 41\n",
"Oncotarget 39\n",
"International journal of molecular sciences 37\n",
"Briefings in bioinformatics 36\n",
"Frontiers in microbiology 34\n",
"Molecular & cellular proteomics : MCP 29\n",
"mSystems 28\n",
"Cell reports 26\n",
"Metabolites 26"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journal_freq.head(20).to_frame()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Sanity check (is any of the top names not unique?) - the numbers should be same if counting by ISSN:"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"index\n",
"2045-2322 126\n",
"1557-8100 77\n",
"1932-6203 69\n",
"1367-4811 68\n",
"2041-1723 58\n",
"1664-8021 55\n",
"1876-7737 53\n",
"1471-2105 52\n",
"1362-4962 45\n",
"1940-6029 42\n",
"Name: journal_issn, dtype: int64"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"literature.journal_issn.sorted_value_counts().head(10)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"index\n",
"Nucleic acids research 45\n",
"Methods in molecular biology (Clifton, N.J.) 43\n",
"Journal of proteome research 42\n",
"BMC genomics 41\n",
"Oncotarget 39\n",
" ..\n",
"Zhongguo yi xue ke xue yuan xue bao. Acta Academiae Medicinae Sinicae 1\n",
"Zhonghua nan ke xue = National journal of andrology 1\n",
"Zhonghua yu fang yi xue za zhi [Chinese journal of preventive medicine] 1\n",
"Zoology (Jena, Germany) 1\n",
"mSphere 1\n",
"Name: journal, Length: 967, dtype: int64"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journal_freq[journal_freq < 50]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"literature = literature.replace({float('nan'): None}).infer_objects()\n",
"%R -i literature"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Publication types"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"%%R\n",
"library(ComplexUpset)\n",
"source('helpers/plots.R')\n",
"source('helpers/colors.R')"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"publication_types_list = ['Is ' + t for t in publication_types['0'].sorted_value_counts().where(lambda x: x > 10).dropna().index]"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"R[write to console]: Scale for 'y' is already present. Adding another scale for 'y', which will\n",
"replace the existing scale.\n",
"\n"
]
},
{
"data": {
"image/png": "\n"
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%R -i publication_types_list -w 2000 -r 100 -h 800\n",
"\n",
"upset(\n",
" literature,\n",
" publication_types_list,\n",
" base_annotations=list(\n",
" 'Intersection size'=intersection_size(\n",
" text=list(angle=90, vjust=0.5, hjust=0)\n",
" )\n",
" ),\n",
" width_ratio=0.1,\n",
" set_sizes=(\n",
" upset_set_size(\n",
" geom=geom_bar(width=0.5)\n",
" )\n",
" + scale_y_continuous(trans=reverse_log_trans())\n",
" + theme(axis.text.x=element_text(angle=90))\n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n"
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%R -w 700 -h 400 -r 100\n",
"(\n",
" ggplot(literature, aes(x=year, fill=has_doi))\n",
" + geom_bar()\n",
" + theme_bw()\n",
") + (\n",
" ggplot(literature, aes(x=year, fill=has_pmc))\n",
" + geom_bar()\n",
" + theme_bw()\n",
") & plot_layout(ncol=1)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}