[1bd6b5]: / notebooks / Exploration.ipynb

Download this file

2901 lines (2900 with data), 266.3 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Aims**:\n",
    " - list high-impact works to aid navigation of the field\n",
    " - check for unexpectedly common authors/affiliations/journals to screening for potential false-positive matches (see the Integromics and Panomics companies)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%run notebook_setup.ipynb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "Imported:\n",
       "\n",
       " - `literature` (904B0F94)\n",
       " - `affiliations` (E06399F2)\n",
       " - `authors` (DC49BC74)\n",
       " - `publication_types` (7DD4E741)\n",
       "\n",
       "at Wednesday, 05. Aug 2020 16:22"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {
      "text/markdown": {
       "action": "import",
       "command": "from pubmed_derived_data import literature, affiliations, authors, publication_types",
       "finished": "2020-08-05T16:22:34.123010",
       "finished_human_readable": "Wednesday, 05. Aug 2020 16:22",
       "result": [
        {
         "new_file": {
          "crc32": "904B0F94",
          "sha256": "A2EFC068A287A3B724AE4B320EE5356E1E99474BD08A2E2A3EBA34CD0194F23B"
         },
         "subject": "literature"
        },
        {
         "new_file": {
          "crc32": "E06399F2",
          "sha256": "8DD13D4B7CF3D2E314BBC4E051AEDBF21414371F42BB4D100D7721B5F4D24E60"
         },
         "subject": "affiliations"
        },
        {
         "new_file": {
          "crc32": "DC49BC74",
          "sha256": "237BEFD0FDA68E2A155B9EC00519017B4C9BC92BD2AA3D10E058A013EC0DE1D9"
         },
         "subject": "authors"
        },
        {
         "new_file": {
          "crc32": "7DD4E741",
          "sha256": "BD0EBF88B38BB9E0E44923E2CB473A532AEFBFFC6A7FCC02926290CAD2615150"
         },
         "subject": "publication_types"
        }
       ],
       "started": "2020-08-05T16:22:30.319280"
      }
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "%vault from pubmed_derived_data import literature, affiliations, authors, publication_types"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "Imported:\n",
       "\n",
       " - `web_of_science_journals` (E95CE31E)\n",
       " - `scimago_by_issn` (DDCBFB24)\n",
       "\n",
       "at Wednesday, 05. Aug 2020 16:22"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {
      "text/markdown": {
       "action": "import",
       "command": "from journals_data import web_of_science_journals, scimago_by_issn",
       "finished": "2020-08-05T16:22:36.022687",
       "finished_human_readable": "Wednesday, 05. Aug 2020 16:22",
       "result": [
        {
         "new_file": {
          "crc32": "E95CE31E",
          "sha256": "55F51248C28FEEC07B4E5A98AD3660519AD3566DC9B61985279E6D4C9B374BF8"
         },
         "subject": "web_of_science_journals"
        },
        {
         "new_file": {
          "crc32": "DDCBFB24",
          "sha256": "B16E18A78F3247A03950A39AB7B64E92EAFA747074BB6B2DBFEBDA7DCA5902D3"
         },
         "subject": "scimago_by_issn"
        }
       ],
       "started": "2020-08-05T16:22:34.145419"
      }
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "%vault from journals_data import web_of_science_journals, scimago_by_issn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "literature['journal_sjr_rank'] = (\n",
    "    literature['journal_issn']\n",
    "    .str.replace('-', '')\n",
    "    .fillna('-')\n",
    "    .apply(\n",
    "        lambda x: (\n",
    "            scimago_by_issn.loc[x].Rank\n",
    "            if x in scimago_by_issn.index else\n",
    "            None\n",
    "        )\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## A quick overview/hot-takes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "columns_to_show = ['title', 'journal', 'doi', 'journal_sjr_rank']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def display_sorted(data):\n",
    "    return data[columns_to_show].sort_values(['journal_sjr_rank', 'title'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Benchmarks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>journal</th>\n",
       "      <th>doi</th>\n",
       "      <th>journal_sjr_rank</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>uid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>30295871</th>\n",
       "      <td>Multi-omic and multi-view clustering algorithm...</td>\n",
       "      <td>Nucleic acids research</td>\n",
       "      <td>10.1093/nar/gky889</td>\n",
       "      <td>90.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30496480</th>\n",
       "      <td>Multi-omic and multi-view clustering algorithm...</td>\n",
       "      <td>Nucleic acids research</td>\n",
       "      <td>10.1093/nar/gky1226</td>\n",
       "      <td>90.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22121217</th>\n",
       "      <td>The Stem Cell Discovery Engine: an integrated ...</td>\n",
       "      <td>Nucleic acids research</td>\n",
       "      <td>10.1093/nar/gkr1051</td>\n",
       "      <td>90.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32234303</th>\n",
       "      <td>Multiomics Evaluation of Gastrointestinal and ...</td>\n",
       "      <td>Gastroenterology</td>\n",
       "      <td>10.1053/j.gastro.2020.03.045</td>\n",
       "      <td>169.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31154149</th>\n",
       "      <td>Quantitative CMR population imaging on 20,000 ...</td>\n",
       "      <td>Medical image analysis</td>\n",
       "      <td>10.1016/j.media.2019.05.006</td>\n",
       "      <td>409.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30068331</th>\n",
       "      <td>Species comparison of liver proteomes reveals ...</td>\n",
       "      <td>BMC biology</td>\n",
       "      <td>10.1186/s12915-018-0547-y</td>\n",
       "      <td>447.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32437529</th>\n",
       "      <td>Integrating multi-OMICS data through sparse Ca...</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "      <td>10.1093/bioinformatics/btaa530</td>\n",
       "      <td>484.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31792509</th>\n",
       "      <td>Clustering and variable selection evaluation o...</td>\n",
       "      <td>Briefings in bioinformatics</td>\n",
       "      <td>10.1093/bib/bbz138</td>\n",
       "      <td>625.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29688321</th>\n",
       "      <td>Comparison and evaluation of integrative metho...</td>\n",
       "      <td>Briefings in bioinformatics</td>\n",
       "      <td>10.1093/bib/bby027</td>\n",
       "      <td>625.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31220206</th>\n",
       "      <td>Evaluation of integrative clustering methods f...</td>\n",
       "      <td>Briefings in bioinformatics</td>\n",
       "      <td>10.1093/bib/bbz015</td>\n",
       "      <td>625.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29272335</th>\n",
       "      <td>Multi-omics integration-a comparison of unsupe...</td>\n",
       "      <td>Briefings in bioinformatics</td>\n",
       "      <td>10.1093/bib/bbx167</td>\n",
       "      <td>625.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30368064</th>\n",
       "      <td>Multi-omics at single-cell resolution: compari...</td>\n",
       "      <td>Current opinion in biotechnology</td>\n",
       "      <td>10.1016/j.copbio.2018.09.012</td>\n",
       "      <td>740.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32255618</th>\n",
       "      <td>Evaluation of Microbiome-Host Relationships in...</td>\n",
       "      <td>Environmental science &amp; technology</td>\n",
       "      <td>10.1021/acs.est.0c00628</td>\n",
       "      <td>797.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25414848</th>\n",
       "      <td>Multi-omic landscape of rheumatoid arthritis: ...</td>\n",
       "      <td>Frontiers in cell and developmental biology</td>\n",
       "      <td>10.3389/fcell.2014.00059</td>\n",
       "      <td>862.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31292535</th>\n",
       "      <td>Multi-omic molecular comparison of primary ver...</td>\n",
       "      <td>British journal of cancer</td>\n",
       "      <td>10.1038/s41416-019-0507-5</td>\n",
       "      <td>941.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30866779</th>\n",
       "      <td>Multi-omics comparisons of p-aminosalicylic ac...</td>\n",
       "      <td>Emerging microbes &amp; infections</td>\n",
       "      <td>10.1080/22221751.2019.1568179</td>\n",
       "      <td>1103.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22954204</th>\n",
       "      <td>Systematic comparison of reverse phase and hyd...</td>\n",
       "      <td>Analytical chemistry</td>\n",
       "      <td>10.1021/ac3012494</td>\n",
       "      <td>1178.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24216987</th>\n",
       "      <td>Metabolomic Dynamic Analysis of Hypoxia in MDA...</td>\n",
       "      <td>Cancers</td>\n",
       "      <td>10.3390/cancers5020491</td>\n",
       "      <td>1388.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29936248</th>\n",
       "      <td>U-BIOPRED: evaluation of the value of a public...</td>\n",
       "      <td>Drug discovery today</td>\n",
       "      <td>10.1016/j.drudis.2018.06.015</td>\n",
       "      <td>1444.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31312416</th>\n",
       "      <td>An Evaluation of Machine Learning Approaches f...</td>\n",
       "      <td>Computational and structural biotechnology jou...</td>\n",
       "      <td>10.1016/j.csbj.2019.05.008</td>\n",
       "      <td>1637.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29213276</th>\n",
       "      <td>An Integrated \"Multi-Omics\" Comparison of Embr...</td>\n",
       "      <td>Frontiers in plant science</td>\n",
       "      <td>10.3389/fpls.2017.01984</td>\n",
       "      <td>1794.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29740416</th>\n",
       "      <td>SplinectomeR Enables Group Comparisons in Long...</td>\n",
       "      <td>Frontiers in microbiology</td>\n",
       "      <td>10.3389/fmicb.2018.00785</td>\n",
       "      <td>1798.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29920461</th>\n",
       "      <td>Multi 'omics comparison reveals metabolome bio...</td>\n",
       "      <td>The Science of the total environment</td>\n",
       "      <td>10.1016/j.scitotenv.2018.05.256</td>\n",
       "      <td>1854.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29212468</th>\n",
       "      <td>A comparison of graph- and kernel-based -omics...</td>\n",
       "      <td>BMC bioinformatics</td>\n",
       "      <td>10.1186/s12859-017-1982-4</td>\n",
       "      <td>1930.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30234027</th>\n",
       "      <td>Comparison of Fecal Collection Methods for Mic...</td>\n",
       "      <td>Frontiers in cellular and infection microbiology</td>\n",
       "      <td>10.3389/fcimb.2018.00301</td>\n",
       "      <td>1931.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31792061</th>\n",
       "      <td>Evaluation of colorectal cancer subtypes and c...</td>\n",
       "      <td>Life science alliance</td>\n",
       "      <td>10.26508/lsa.201900517</td>\n",
       "      <td>2050.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31649733</th>\n",
       "      <td>CEPICS: A Comparison and Evaluation Platform f...</td>\n",
       "      <td>Frontiers in genetics</td>\n",
       "      <td>10.3389/fgene.2019.00966</td>\n",
       "      <td>2313.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27929400</th>\n",
       "      <td>A Systematic Evaluation of Blood Serum and Pla...</td>\n",
       "      <td>International journal of molecular sciences</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2808.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32386347</th>\n",
       "      <td>Comparison of Proteomic Assessment Methods in ...</td>\n",
       "      <td>Proteomics</td>\n",
       "      <td>10.1002/pmic.201900278</td>\n",
       "      <td>2944.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22796353</th>\n",
       "      <td>Effects of pre-storage leukoreduction on store...</td>\n",
       "      <td>Journal of proteomics</td>\n",
       "      <td>10.1016/j.jprot.2012.06.032</td>\n",
       "      <td>3300.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32252356</th>\n",
       "      <td>Deep Functional Profiling Facilitates the Eval...</td>\n",
       "      <td>Antibiotics (Basel, Switzerland)</td>\n",
       "      <td>10.3390/antibiotics9040157</td>\n",
       "      <td>3371.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30596412</th>\n",
       "      <td>Growth Performance and Meat Quality Evaluation...</td>\n",
       "      <td>Journal of agricultural and food chemistry</td>\n",
       "      <td>10.1021/acs.jafc.8b05477</td>\n",
       "      <td>3803.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28927277</th>\n",
       "      <td>Benchmark Dose Modeling Estimates of the Conce...</td>\n",
       "      <td>Chemical research in toxicology</td>\n",
       "      <td>10.1021/acs.chemrestox.7b00221</td>\n",
       "      <td>4330.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31942259</th>\n",
       "      <td>A multiomics comparison between endometrial ca...</td>\n",
       "      <td>PeerJ</td>\n",
       "      <td>10.7717/peerj.8347</td>\n",
       "      <td>4381.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28804556</th>\n",
       "      <td>Anti-tumor efficacy evaluation of a novel mono...</td>\n",
       "      <td>American journal of translational research</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4480.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21081137</th>\n",
       "      <td>Cross-study and cross-omics comparisons of thr...</td>\n",
       "      <td>Toxicology and applied pharmacology</td>\n",
       "      <td>10.1016/j.taap.2010.11.006</td>\n",
       "      <td>4714.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28546903</th>\n",
       "      <td>Comprehensive reconstruction and evaluation of...</td>\n",
       "      <td>Bioresources and bioprocessing</td>\n",
       "      <td>10.1186/s40643-017-0152-x</td>\n",
       "      <td>5398.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26495307</th>\n",
       "      <td>Cross-omics comparison of stress responses in ...</td>\n",
       "      <td>BioMed research international</td>\n",
       "      <td>10.1155/2015/628158</td>\n",
       "      <td>7157.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28521615</th>\n",
       "      <td>Comprehensive Evaluation of (+)-Usnic Acid-ind...</td>\n",
       "      <td>Toxicologic pathology</td>\n",
       "      <td>10.1177/0192623317707074</td>\n",
       "      <td>7826.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17785938</th>\n",
       "      <td>Evaluation of human hepatocyte chimeric mice a...</td>\n",
       "      <td>The Journal of toxicological sciences</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8472.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31486672</th>\n",
       "      <td>: A Novel Bayesian Network Structural Learning...</td>\n",
       "      <td>Journal of computational biology : a journal o...</td>\n",
       "      <td>10.1089/cmb.2019.0210</td>\n",
       "      <td>8932.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28994524</th>\n",
       "      <td>[Clinical value evaluation of Chinese herbal f...</td>\n",
       "      <td>Zhongguo Zhong yao za zhi = Zhongguo zhongyao ...</td>\n",
       "      <td>10.19540/j.cnki.cjcmm.20170103.001</td>\n",
       "      <td>21244.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21765119</th>\n",
       "      <td>A comparison of the cyclic variation in serum ...</td>\n",
       "      <td>Biological research for nursing</td>\n",
       "      <td>10.1177/1099800411412766</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24598031</th>\n",
       "      <td>A cross-omics toxicological evaluation of drin...</td>\n",
       "      <td>Journal of hazardous materials</td>\n",
       "      <td>10.1016/j.jhazmat.2014.02.007</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31344359</th>\n",
       "      <td>Before and After: Comparison of Legacy and Har...</td>\n",
       "      <td>Cell systems</td>\n",
       "      <td>10.1016/j.cels.2019.06.006</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32585153</th>\n",
       "      <td>Evaluation of Bunina et al.: Synthesizing Mult...</td>\n",
       "      <td>Cell systems</td>\n",
       "      <td>10.1016/j.cels.2020.06.002</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32156385</th>\n",
       "      <td>Foodomics evaluation of the anti-proliferative...</td>\n",
       "      <td>Food research international (Ottawa, Ont.)</td>\n",
       "      <td>10.1016/j.foodres.2019.108938</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32117598</th>\n",
       "      <td>Multiomics Evaluation of Human Fat-Derived Mes...</td>\n",
       "      <td>BioResearch open access</td>\n",
       "      <td>10.1089/biores.2020.0005</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      title  \\\n",
       "uid                                                           \n",
       "30295871  Multi-omic and multi-view clustering algorithm...   \n",
       "30496480  Multi-omic and multi-view clustering algorithm...   \n",
       "22121217  The Stem Cell Discovery Engine: an integrated ...   \n",
       "32234303  Multiomics Evaluation of Gastrointestinal and ...   \n",
       "31154149  Quantitative CMR population imaging on 20,000 ...   \n",
       "30068331  Species comparison of liver proteomes reveals ...   \n",
       "32437529  Integrating multi-OMICS data through sparse Ca...   \n",
       "31792509  Clustering and variable selection evaluation o...   \n",
       "29688321  Comparison and evaluation of integrative metho...   \n",
       "31220206  Evaluation of integrative clustering methods f...   \n",
       "29272335  Multi-omics integration-a comparison of unsupe...   \n",
       "30368064  Multi-omics at single-cell resolution: compari...   \n",
       "32255618  Evaluation of Microbiome-Host Relationships in...   \n",
       "25414848  Multi-omic landscape of rheumatoid arthritis: ...   \n",
       "31292535  Multi-omic molecular comparison of primary ver...   \n",
       "30866779  Multi-omics comparisons of p-aminosalicylic ac...   \n",
       "22954204  Systematic comparison of reverse phase and hyd...   \n",
       "24216987  Metabolomic Dynamic Analysis of Hypoxia in MDA...   \n",
       "29936248  U-BIOPRED: evaluation of the value of a public...   \n",
       "31312416  An Evaluation of Machine Learning Approaches f...   \n",
       "29213276  An Integrated \"Multi-Omics\" Comparison of Embr...   \n",
       "29740416  SplinectomeR Enables Group Comparisons in Long...   \n",
       "29920461  Multi 'omics comparison reveals metabolome bio...   \n",
       "29212468  A comparison of graph- and kernel-based -omics...   \n",
       "30234027  Comparison of Fecal Collection Methods for Mic...   \n",
       "31792061  Evaluation of colorectal cancer subtypes and c...   \n",
       "31649733  CEPICS: A Comparison and Evaluation Platform f...   \n",
       "27929400  A Systematic Evaluation of Blood Serum and Pla...   \n",
       "32386347  Comparison of Proteomic Assessment Methods in ...   \n",
       "22796353  Effects of pre-storage leukoreduction on store...   \n",
       "32252356  Deep Functional Profiling Facilitates the Eval...   \n",
       "30596412  Growth Performance and Meat Quality Evaluation...   \n",
       "28927277  Benchmark Dose Modeling Estimates of the Conce...   \n",
       "31942259  A multiomics comparison between endometrial ca...   \n",
       "28804556  Anti-tumor efficacy evaluation of a novel mono...   \n",
       "21081137  Cross-study and cross-omics comparisons of thr...   \n",
       "28546903  Comprehensive reconstruction and evaluation of...   \n",
       "26495307  Cross-omics comparison of stress responses in ...   \n",
       "28521615  Comprehensive Evaluation of (+)-Usnic Acid-ind...   \n",
       "17785938  Evaluation of human hepatocyte chimeric mice a...   \n",
       "31486672  : A Novel Bayesian Network Structural Learning...   \n",
       "28994524  [Clinical value evaluation of Chinese herbal f...   \n",
       "21765119  A comparison of the cyclic variation in serum ...   \n",
       "24598031  A cross-omics toxicological evaluation of drin...   \n",
       "31344359  Before and After: Comparison of Legacy and Har...   \n",
       "32585153  Evaluation of Bunina et al.: Synthesizing Mult...   \n",
       "32156385  Foodomics evaluation of the anti-proliferative...   \n",
       "32117598  Multiomics Evaluation of Human Fat-Derived Mes...   \n",
       "\n",
       "                                                    journal  \\\n",
       "uid                                                           \n",
       "30295871                             Nucleic acids research   \n",
       "30496480                             Nucleic acids research   \n",
       "22121217                             Nucleic acids research   \n",
       "32234303                                   Gastroenterology   \n",
       "31154149                             Medical image analysis   \n",
       "30068331                                        BMC biology   \n",
       "32437529                   Bioinformatics (Oxford, England)   \n",
       "31792509                        Briefings in bioinformatics   \n",
       "29688321                        Briefings in bioinformatics   \n",
       "31220206                        Briefings in bioinformatics   \n",
       "29272335                        Briefings in bioinformatics   \n",
       "30368064                   Current opinion in biotechnology   \n",
       "32255618                 Environmental science & technology   \n",
       "25414848        Frontiers in cell and developmental biology   \n",
       "31292535                          British journal of cancer   \n",
       "30866779                     Emerging microbes & infections   \n",
       "22954204                               Analytical chemistry   \n",
       "24216987                                            Cancers   \n",
       "29936248                               Drug discovery today   \n",
       "31312416  Computational and structural biotechnology jou...   \n",
       "29213276                         Frontiers in plant science   \n",
       "29740416                          Frontiers in microbiology   \n",
       "29920461               The Science of the total environment   \n",
       "29212468                                 BMC bioinformatics   \n",
       "30234027   Frontiers in cellular and infection microbiology   \n",
       "31792061                              Life science alliance   \n",
       "31649733                              Frontiers in genetics   \n",
       "27929400        International journal of molecular sciences   \n",
       "32386347                                         Proteomics   \n",
       "22796353                              Journal of proteomics   \n",
       "32252356                   Antibiotics (Basel, Switzerland)   \n",
       "30596412         Journal of agricultural and food chemistry   \n",
       "28927277                    Chemical research in toxicology   \n",
       "31942259                                              PeerJ   \n",
       "28804556         American journal of translational research   \n",
       "21081137                Toxicology and applied pharmacology   \n",
       "28546903                     Bioresources and bioprocessing   \n",
       "26495307                      BioMed research international   \n",
       "28521615                              Toxicologic pathology   \n",
       "17785938              The Journal of toxicological sciences   \n",
       "31486672  Journal of computational biology : a journal o...   \n",
       "28994524  Zhongguo Zhong yao za zhi = Zhongguo zhongyao ...   \n",
       "21765119                    Biological research for nursing   \n",
       "24598031                     Journal of hazardous materials   \n",
       "31344359                                       Cell systems   \n",
       "32585153                                       Cell systems   \n",
       "32156385         Food research international (Ottawa, Ont.)   \n",
       "32117598                            BioResearch open access   \n",
       "\n",
       "                                         doi  journal_sjr_rank  \n",
       "uid                                                             \n",
       "30295871                  10.1093/nar/gky889              90.0  \n",
       "30496480                 10.1093/nar/gky1226              90.0  \n",
       "22121217                 10.1093/nar/gkr1051              90.0  \n",
       "32234303        10.1053/j.gastro.2020.03.045             169.0  \n",
       "31154149         10.1016/j.media.2019.05.006             409.0  \n",
       "30068331           10.1186/s12915-018-0547-y             447.0  \n",
       "32437529      10.1093/bioinformatics/btaa530             484.0  \n",
       "31792509                  10.1093/bib/bbz138             625.0  \n",
       "29688321                  10.1093/bib/bby027             625.0  \n",
       "31220206                  10.1093/bib/bbz015             625.0  \n",
       "29272335                  10.1093/bib/bbx167             625.0  \n",
       "30368064        10.1016/j.copbio.2018.09.012             740.0  \n",
       "32255618             10.1021/acs.est.0c00628             797.0  \n",
       "25414848            10.3389/fcell.2014.00059             862.0  \n",
       "31292535           10.1038/s41416-019-0507-5             941.0  \n",
       "30866779       10.1080/22221751.2019.1568179            1103.0  \n",
       "22954204                   10.1021/ac3012494            1178.0  \n",
       "24216987              10.3390/cancers5020491            1388.0  \n",
       "29936248        10.1016/j.drudis.2018.06.015            1444.0  \n",
       "31312416          10.1016/j.csbj.2019.05.008            1637.0  \n",
       "29213276             10.3389/fpls.2017.01984            1794.0  \n",
       "29740416            10.3389/fmicb.2018.00785            1798.0  \n",
       "29920461     10.1016/j.scitotenv.2018.05.256            1854.0  \n",
       "29212468           10.1186/s12859-017-1982-4            1930.0  \n",
       "30234027            10.3389/fcimb.2018.00301            1931.0  \n",
       "31792061              10.26508/lsa.201900517            2050.0  \n",
       "31649733            10.3389/fgene.2019.00966            2313.0  \n",
       "27929400                                 NaN            2808.0  \n",
       "32386347              10.1002/pmic.201900278            2944.0  \n",
       "22796353         10.1016/j.jprot.2012.06.032            3300.0  \n",
       "32252356          10.3390/antibiotics9040157            3371.0  \n",
       "30596412            10.1021/acs.jafc.8b05477            3803.0  \n",
       "28927277      10.1021/acs.chemrestox.7b00221            4330.0  \n",
       "31942259                  10.7717/peerj.8347            4381.0  \n",
       "28804556                                 NaN            4480.0  \n",
       "21081137          10.1016/j.taap.2010.11.006            4714.0  \n",
       "28546903           10.1186/s40643-017-0152-x            5398.0  \n",
       "26495307                 10.1155/2015/628158            7157.0  \n",
       "28521615            10.1177/0192623317707074            7826.0  \n",
       "17785938                                 NaN            8472.0  \n",
       "31486672               10.1089/cmb.2019.0210            8932.0  \n",
       "28994524  10.19540/j.cnki.cjcmm.20170103.001           21244.0  \n",
       "21765119            10.1177/1099800411412766               NaN  \n",
       "24598031       10.1016/j.jhazmat.2014.02.007               NaN  \n",
       "31344359          10.1016/j.cels.2019.06.006               NaN  \n",
       "32585153          10.1016/j.cels.2020.06.002               NaN  \n",
       "32156385       10.1016/j.foodres.2019.108938               NaN  \n",
       "32117598            10.1089/biores.2020.0005               NaN  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "literature[literature.title.str.lower().str.contains('|'.join(['benchmark', 'evaluation', 'comparison']))].pipe(display_sorted)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Biomarkers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>journal</th>\n",
       "      <th>doi</th>\n",
       "      <th>journal_sjr_rank</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>uid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>31563876</th>\n",
       "      <td>Exploiting differential Wnt target gene expres...</td>\n",
       "      <td>Gut</td>\n",
       "      <td>10.1136/gutjnl-2019-319126</td>\n",
       "      <td>121.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28838933</th>\n",
       "      <td>Epigenome-Wide Association Study Identifies Ca...</td>\n",
       "      <td>Circulation</td>\n",
       "      <td>10.1161/CIRCULATIONAHA.117.027355</td>\n",
       "      <td>142.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24859455</th>\n",
       "      <td>Identification of prognostic biomarkers in hep...</td>\n",
       "      <td>Journal of hepatology</td>\n",
       "      <td>10.1016/j.jhep.2014.05.025</td>\n",
       "      <td>171.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31501510</th>\n",
       "      <td>Multi-omic biomarker identification and valida...</td>\n",
       "      <td>Molecular psychiatry</td>\n",
       "      <td>10.1038/s41380-019-0496-z</td>\n",
       "      <td>187.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31312043</th>\n",
       "      <td>Multi-omics in IBD biomarker discovery: the mi...</td>\n",
       "      <td>Nature reviews. Gastroenterology &amp; hepatology</td>\n",
       "      <td>10.1038/s41575-019-0188-9</td>\n",
       "      <td>196.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29764059</th>\n",
       "      <td>Leveraging next-generation phenotyping and pan...</td>\n",
       "      <td>Personalized medicine</td>\n",
       "      <td>10.2217/pme.14.6</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31882086</th>\n",
       "      <td>Meta-proteomics for the discovery of protein b...</td>\n",
       "      <td>Food research international (Ottawa, Ont.)</td>\n",
       "      <td>10.1016/j.foodres.2019.108739</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29781548</th>\n",
       "      <td>Multi-omics in high-grade serous ovarian cance...</td>\n",
       "      <td>American journal of reproductive immunology (N...</td>\n",
       "      <td>10.1111/aji.12975</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29686467</th>\n",
       "      <td>Multiomics biomarkers for the prediction of no...</td>\n",
       "      <td>World journal of gastroenterology</td>\n",
       "      <td>10.3748/wjg.v24.i15.1601</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26312246</th>\n",
       "      <td>Wading through the noise of \"multi-omics\" to i...</td>\n",
       "      <td>Hepatobiliary surgery and nutrition</td>\n",
       "      <td>10.3978/j.issn.2304-3881.2015.04.05</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>141 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      title  \\\n",
       "uid                                                           \n",
       "31563876  Exploiting differential Wnt target gene expres...   \n",
       "28838933  Epigenome-Wide Association Study Identifies Ca...   \n",
       "24859455  Identification of prognostic biomarkers in hep...   \n",
       "31501510  Multi-omic biomarker identification and valida...   \n",
       "31312043  Multi-omics in IBD biomarker discovery: the mi...   \n",
       "...                                                     ...   \n",
       "29764059  Leveraging next-generation phenotyping and pan...   \n",
       "31882086  Meta-proteomics for the discovery of protein b...   \n",
       "29781548  Multi-omics in high-grade serous ovarian cance...   \n",
       "29686467  Multiomics biomarkers for the prediction of no...   \n",
       "26312246  Wading through the noise of \"multi-omics\" to i...   \n",
       "\n",
       "                                                    journal  \\\n",
       "uid                                                           \n",
       "31563876                                                Gut   \n",
       "28838933                                        Circulation   \n",
       "24859455                              Journal of hepatology   \n",
       "31501510                               Molecular psychiatry   \n",
       "31312043      Nature reviews. Gastroenterology & hepatology   \n",
       "...                                                     ...   \n",
       "29764059                              Personalized medicine   \n",
       "31882086         Food research international (Ottawa, Ont.)   \n",
       "29781548  American journal of reproductive immunology (N...   \n",
       "29686467                  World journal of gastroenterology   \n",
       "26312246                Hepatobiliary surgery and nutrition   \n",
       "\n",
       "                                          doi  journal_sjr_rank  \n",
       "uid                                                              \n",
       "31563876           10.1136/gutjnl-2019-319126             121.0  \n",
       "28838933    10.1161/CIRCULATIONAHA.117.027355             142.0  \n",
       "24859455           10.1016/j.jhep.2014.05.025             171.0  \n",
       "31501510            10.1038/s41380-019-0496-z             187.0  \n",
       "31312043            10.1038/s41575-019-0188-9             196.0  \n",
       "...                                       ...               ...  \n",
       "29764059                     10.2217/pme.14.6               NaN  \n",
       "31882086        10.1016/j.foodres.2019.108739               NaN  \n",
       "29781548                    10.1111/aji.12975               NaN  \n",
       "29686467             10.3748/wjg.v24.i15.1601               NaN  \n",
       "26312246  10.3978/j.issn.2304-3881.2015.04.05               NaN  \n",
       "\n",
       "[141 rows x 4 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "literature[literature.title.str.lower().str.contains('|'.join(['biomarker']))].pipe(display_sorted)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>journal</th>\n",
       "      <th>doi</th>\n",
       "      <th>journal_sjr_rank</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>uid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>29479082</th>\n",
       "      <td>Integrative omics for health and disease.</td>\n",
       "      <td>Nature reviews. Genetics</td>\n",
       "      <td>10.1038/nrg.2018.4</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31925408</th>\n",
       "      <td>Multi-omics shows the (default) way.</td>\n",
       "      <td>Nature reviews. Genetics</td>\n",
       "      <td>10.1038/s41576-020-0211-6</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28970588</th>\n",
       "      <td>Technique: CRISPR CAPTURE for multi-omic probi...</td>\n",
       "      <td>Nature reviews. Genetics</td>\n",
       "      <td>10.1038/nrg.2017.79</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31491384</th>\n",
       "      <td>Antibiotics-Driven Gut Microbiome Perturbation...</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2019.08.010</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29100071</th>\n",
       "      <td>Cancer Evolution during Immunotherapy.</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2017.10.027</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30595449</th>\n",
       "      <td>Gene Regulatory Programs Conferring Phenotypic...</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2018.11.045</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27863251</th>\n",
       "      <td>Genetic Drivers of Epigenetic and Transcriptio...</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2016.10.026</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31585088</th>\n",
       "      <td>Integrated Proteogenomic Characterization of H...</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2019.08.052</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32059783</th>\n",
       "      <td>Lymphoma Driver Mutations in the Pathogenic Ev...</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2020.01.029</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27912059</th>\n",
       "      <td>Microbiota Diurnal Rhythmicity Programs Host T...</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2016.11.003</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32470399</th>\n",
       "      <td>Molecular Choreography of Acute Exercise.</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2020.04.043</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32589957</th>\n",
       "      <td>Molecular Transducers of Physical Activity Con...</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2020.06.004</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32579974</th>\n",
       "      <td>Multimodal Analysis of Composition and Spatial...</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2020.05.039</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30193112</th>\n",
       "      <td>Personalized Gut Mucosal Colonization Resistan...</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2018.08.041</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32649874</th>\n",
       "      <td>Proteogenomic Characterization Reveals Therape...</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2020.06.013</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32059776</th>\n",
       "      <td>Proteogenomic Characterization of Endometrial ...</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2020.01.026</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32649875</th>\n",
       "      <td>Proteogenomics of Non-smoking Lung Cancer in E...</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2020.06.012</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29677503</th>\n",
       "      <td>Revolutionizing Precision Oncology through Col...</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2018.04.008</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29328914</th>\n",
       "      <td>Rewiring of the Fruit Metabolome in Tomato Bre...</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2017.12.019</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31178122</th>\n",
       "      <td>Single-Cell Multi-omic Integration Compares an...</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2019.05.006</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      title  \\\n",
       "uid                                                           \n",
       "29479082          Integrative omics for health and disease.   \n",
       "31925408               Multi-omics shows the (default) way.   \n",
       "28970588  Technique: CRISPR CAPTURE for multi-omic probi...   \n",
       "31491384  Antibiotics-Driven Gut Microbiome Perturbation...   \n",
       "29100071             Cancer Evolution during Immunotherapy.   \n",
       "30595449  Gene Regulatory Programs Conferring Phenotypic...   \n",
       "27863251  Genetic Drivers of Epigenetic and Transcriptio...   \n",
       "31585088  Integrated Proteogenomic Characterization of H...   \n",
       "32059783  Lymphoma Driver Mutations in the Pathogenic Ev...   \n",
       "27912059  Microbiota Diurnal Rhythmicity Programs Host T...   \n",
       "32470399          Molecular Choreography of Acute Exercise.   \n",
       "32589957  Molecular Transducers of Physical Activity Con...   \n",
       "32579974  Multimodal Analysis of Composition and Spatial...   \n",
       "30193112  Personalized Gut Mucosal Colonization Resistan...   \n",
       "32649874  Proteogenomic Characterization Reveals Therape...   \n",
       "32059776  Proteogenomic Characterization of Endometrial ...   \n",
       "32649875  Proteogenomics of Non-smoking Lung Cancer in E...   \n",
       "29677503  Revolutionizing Precision Oncology through Col...   \n",
       "29328914  Rewiring of the Fruit Metabolome in Tomato Bre...   \n",
       "31178122  Single-Cell Multi-omic Integration Compares an...   \n",
       "\n",
       "                           journal                         doi  \\\n",
       "uid                                                              \n",
       "29479082  Nature reviews. Genetics          10.1038/nrg.2018.4   \n",
       "31925408  Nature reviews. Genetics   10.1038/s41576-020-0211-6   \n",
       "28970588  Nature reviews. Genetics         10.1038/nrg.2017.79   \n",
       "31491384                      Cell  10.1016/j.cell.2019.08.010   \n",
       "29100071                      Cell  10.1016/j.cell.2017.10.027   \n",
       "30595449                      Cell  10.1016/j.cell.2018.11.045   \n",
       "27863251                      Cell  10.1016/j.cell.2016.10.026   \n",
       "31585088                      Cell  10.1016/j.cell.2019.08.052   \n",
       "32059783                      Cell  10.1016/j.cell.2020.01.029   \n",
       "27912059                      Cell  10.1016/j.cell.2016.11.003   \n",
       "32470399                      Cell  10.1016/j.cell.2020.04.043   \n",
       "32589957                      Cell  10.1016/j.cell.2020.06.004   \n",
       "32579974                      Cell  10.1016/j.cell.2020.05.039   \n",
       "30193112                      Cell  10.1016/j.cell.2018.08.041   \n",
       "32649874                      Cell  10.1016/j.cell.2020.06.013   \n",
       "32059776                      Cell  10.1016/j.cell.2020.01.026   \n",
       "32649875                      Cell  10.1016/j.cell.2020.06.012   \n",
       "29677503                      Cell  10.1016/j.cell.2018.04.008   \n",
       "29328914                      Cell  10.1016/j.cell.2017.12.019   \n",
       "31178122                      Cell  10.1016/j.cell.2019.05.006   \n",
       "\n",
       "          journal_sjr_rank  \n",
       "uid                         \n",
       "29479082               7.0  \n",
       "31925408               7.0  \n",
       "28970588               7.0  \n",
       "31491384               8.0  \n",
       "29100071               8.0  \n",
       "30595449               8.0  \n",
       "27863251               8.0  \n",
       "31585088               8.0  \n",
       "32059783               8.0  \n",
       "27912059               8.0  \n",
       "32470399               8.0  \n",
       "32589957               8.0  \n",
       "32579974               8.0  \n",
       "30193112               8.0  \n",
       "32649874               8.0  \n",
       "32059776               8.0  \n",
       "32649875               8.0  \n",
       "29677503               8.0  \n",
       "29328914               8.0  \n",
       "31178122               8.0  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "literature.pipe(display_sorted).head(20)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Discoveries?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>journal</th>\n",
       "      <th>doi</th>\n",
       "      <th>journal_sjr_rank</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>uid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>32649874</th>\n",
       "      <td>Proteogenomic Characterization Reveals Therape...</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2020.06.013</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29328921</th>\n",
       "      <td>Tomato Multiomics Reveals Consequences of Crop...</td>\n",
       "      <td>Cell</td>\n",
       "      <td>10.1016/j.cell.2017.12.036</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28285833</th>\n",
       "      <td>Integrative Proteomics and Phosphoproteomics P...</td>\n",
       "      <td>Immunity</td>\n",
       "      <td>10.1016/j.immuni.2017.02.010</td>\n",
       "      <td>50.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29898900</th>\n",
       "      <td>Principled multi-omic analysis reveals gene re...</td>\n",
       "      <td>Genome research</td>\n",
       "      <td>10.1101/gr.227066.117</td>\n",
       "      <td>84.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32182340</th>\n",
       "      <td>Multi-omic analysis of gametogenesis reveals a...</td>\n",
       "      <td>Nucleic acids research</td>\n",
       "      <td>10.1093/nar/gkaa163</td>\n",
       "      <td>90.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31227589</th>\n",
       "      <td>Integrated multiomic analysis reveals comprehe...</td>\n",
       "      <td>Gut</td>\n",
       "      <td>10.1136/gutjnl-2019-318912</td>\n",
       "      <td>121.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30268436</th>\n",
       "      <td>A Pan-Cancer Analysis Reveals High-Frequency G...</td>\n",
       "      <td>Cell systems</td>\n",
       "      <td>10.1016/j.cels.2018.08.010</td>\n",
       "      <td>131.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28544881</th>\n",
       "      <td>Mammalian Systems Biotechnology Reveals Global...</td>\n",
       "      <td>Cell systems</td>\n",
       "      <td>10.1016/j.cels.2017.04.009</td>\n",
       "      <td>131.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26446169</th>\n",
       "      <td>A Cross-Species Analysis in Pancreatic Neuroen...</td>\n",
       "      <td>Cancer discovery</td>\n",
       "      <td>10.1158/2159-8290.CD-15-0068</td>\n",
       "      <td>137.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29588317</th>\n",
       "      <td>Spatiotemporal Multi-Omics Mapping Generates a...</td>\n",
       "      <td>Circulation</td>\n",
       "      <td>10.1161/CIRCULATIONAHA.117.032291</td>\n",
       "      <td>142.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27132591</th>\n",
       "      <td>Integrative Multi-omic Analysis of Human Plate...</td>\n",
       "      <td>American journal of human genetics</td>\n",
       "      <td>10.1016/j.ajhg.2016.03.007</td>\n",
       "      <td>143.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32023468</th>\n",
       "      <td>Inverse Data-Driven Modeling and Multiomics An...</td>\n",
       "      <td>Cell reports</td>\n",
       "      <td>10.1016/j.celrep.2020.01.011</td>\n",
       "      <td>203.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32522993</th>\n",
       "      <td>A multi-omics analysis reveals the unfolded pr...</td>\n",
       "      <td>Nature communications</td>\n",
       "      <td>10.1038/s41467-020-16747-y</td>\n",
       "      <td>238.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30692544</th>\n",
       "      <td>Deconvolution of single-cell multi-omics layer...</td>\n",
       "      <td>Nature communications</td>\n",
       "      <td>10.1038/s41467-018-08205-7</td>\n",
       "      <td>238.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31604924</th>\n",
       "      <td>Integrated molecular characterization of chond...</td>\n",
       "      <td>Nature communications</td>\n",
       "      <td>10.1038/s41467-019-12525-7</td>\n",
       "      <td>238.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29500431</th>\n",
       "      <td>Integrative analysis of omics summary data rev...</td>\n",
       "      <td>Nature communications</td>\n",
       "      <td>10.1038/s41467-018-03371-0</td>\n",
       "      <td>238.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29615613</th>\n",
       "      <td>Multi-omics analysis reveals neoantigen-indepe...</td>\n",
       "      <td>Nature communications</td>\n",
       "      <td>10.1038/s41467-018-03730-x</td>\n",
       "      <td>238.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29713003</th>\n",
       "      <td>Multi-omics profiling of younger Asian breast ...</td>\n",
       "      <td>Nature communications</td>\n",
       "      <td>10.1038/s41467-018-04129-4</td>\n",
       "      <td>238.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32312967</th>\n",
       "      <td>Nitrogen limitation reveals large reserves in ...</td>\n",
       "      <td>Nature communications</td>\n",
       "      <td>10.1038/s41467-020-15749-0</td>\n",
       "      <td>238.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28408661</th>\n",
       "      <td>Multi-Omics of Tomato Glandular Trichomes Reve...</td>\n",
       "      <td>The Plant cell</td>\n",
       "      <td>10.1105/tpc.17.00060</td>\n",
       "      <td>254.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      title  \\\n",
       "uid                                                           \n",
       "32649874  Proteogenomic Characterization Reveals Therape...   \n",
       "29328921  Tomato Multiomics Reveals Consequences of Crop...   \n",
       "28285833  Integrative Proteomics and Phosphoproteomics P...   \n",
       "29898900  Principled multi-omic analysis reveals gene re...   \n",
       "32182340  Multi-omic analysis of gametogenesis reveals a...   \n",
       "31227589  Integrated multiomic analysis reveals comprehe...   \n",
       "30268436  A Pan-Cancer Analysis Reveals High-Frequency G...   \n",
       "28544881  Mammalian Systems Biotechnology Reveals Global...   \n",
       "26446169  A Cross-Species Analysis in Pancreatic Neuroen...   \n",
       "29588317  Spatiotemporal Multi-Omics Mapping Generates a...   \n",
       "27132591  Integrative Multi-omic Analysis of Human Plate...   \n",
       "32023468  Inverse Data-Driven Modeling and Multiomics An...   \n",
       "32522993  A multi-omics analysis reveals the unfolded pr...   \n",
       "30692544  Deconvolution of single-cell multi-omics layer...   \n",
       "31604924  Integrated molecular characterization of chond...   \n",
       "29500431  Integrative analysis of omics summary data rev...   \n",
       "29615613  Multi-omics analysis reveals neoantigen-indepe...   \n",
       "29713003  Multi-omics profiling of younger Asian breast ...   \n",
       "32312967  Nitrogen limitation reveals large reserves in ...   \n",
       "28408661  Multi-Omics of Tomato Glandular Trichomes Reve...   \n",
       "\n",
       "                                     journal  \\\n",
       "uid                                            \n",
       "32649874                                Cell   \n",
       "29328921                                Cell   \n",
       "28285833                            Immunity   \n",
       "29898900                     Genome research   \n",
       "32182340              Nucleic acids research   \n",
       "31227589                                 Gut   \n",
       "30268436                        Cell systems   \n",
       "28544881                        Cell systems   \n",
       "26446169                    Cancer discovery   \n",
       "29588317                         Circulation   \n",
       "27132591  American journal of human genetics   \n",
       "32023468                        Cell reports   \n",
       "32522993               Nature communications   \n",
       "30692544               Nature communications   \n",
       "31604924               Nature communications   \n",
       "29500431               Nature communications   \n",
       "29615613               Nature communications   \n",
       "29713003               Nature communications   \n",
       "32312967               Nature communications   \n",
       "28408661                      The Plant cell   \n",
       "\n",
       "                                        doi  journal_sjr_rank  \n",
       "uid                                                            \n",
       "32649874         10.1016/j.cell.2020.06.013               8.0  \n",
       "29328921         10.1016/j.cell.2017.12.036               8.0  \n",
       "28285833       10.1016/j.immuni.2017.02.010              50.0  \n",
       "29898900              10.1101/gr.227066.117              84.0  \n",
       "32182340                10.1093/nar/gkaa163              90.0  \n",
       "31227589         10.1136/gutjnl-2019-318912             121.0  \n",
       "30268436         10.1016/j.cels.2018.08.010             131.0  \n",
       "28544881         10.1016/j.cels.2017.04.009             131.0  \n",
       "26446169       10.1158/2159-8290.CD-15-0068             137.0  \n",
       "29588317  10.1161/CIRCULATIONAHA.117.032291             142.0  \n",
       "27132591         10.1016/j.ajhg.2016.03.007             143.0  \n",
       "32023468       10.1016/j.celrep.2020.01.011             203.0  \n",
       "32522993         10.1038/s41467-020-16747-y             238.0  \n",
       "30692544         10.1038/s41467-018-08205-7             238.0  \n",
       "31604924         10.1038/s41467-019-12525-7             238.0  \n",
       "29500431         10.1038/s41467-018-03371-0             238.0  \n",
       "29615613         10.1038/s41467-018-03730-x             238.0  \n",
       "29713003         10.1038/s41467-018-04129-4             238.0  \n",
       "32312967         10.1038/s41467-020-15749-0             238.0  \n",
       "28408661               10.1105/tpc.17.00060             254.0  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "literature[literature.title.str.lower().str.contains('|'.join(['reveals']))].pipe(display_sorted).head(20)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Affiliations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Most authors with given affiliation on papers:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Affiliation</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Department of Genetics, Stanford University School of Medicine, Stanford, CA, USA.</th>\n",
       "      <td>58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tohoku Medical Megabank Organization, Tohoku University, Sendai, Japan.</th>\n",
       "      <td>48</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, China.</th>\n",
       "      <td>46</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>The European Molecular Biology Laboratory, The European Bioinformatics Institute, The Wellcome Trust Genome Campus, Hinxton, Cambridgeshire CB10 1SD, UK.</th>\n",
       "      <td>39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, Heilongjiang 150086, China.</th>\n",
       "      <td>32</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                    Affiliation\n",
       "index                                                          \n",
       "Department of Genetics, Stanford University Sch...           58\n",
       "Tohoku Medical Megabank Organization, Tohoku Un...           48\n",
       "College of Bioinformatics Science and Technolog...           46\n",
       "The European Molecular Biology Laboratory, The ...           39\n",
       "College of Bioinformatics Science and Technolog...           32"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "affiliations.Affiliation.sorted_value_counts().head(5).to_frame()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Most papers with given affiliation:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Affiliation</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>M&amp;M Medical BioInformatics, Hongo 113-0033, Japan.</th>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, China.</th>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Department of Genetics, Stanford University School of Medicine, Stanford, CA, USA.</th>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>University of Chinese Academy of Sciences, Beijing 100049, China.</th>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Institute for Systems Biology, Seattle, WA, USA.</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Luxembourg Centre for Systems Biomedicine, University of Luxembourg, Esch-sur-Alzette, Luxembourg.</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Mosaiques Diagnostics GmbH, Hannover, Germany.</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Biological Sciences Division, Pacific Northwest National Laboratory, Richland, WA, USA.</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Blavatnik School of Computer Science, Tel Aviv University, Tel Aviv, Israel.</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>College of Bioinformatics Science and Technology, Harbin Medical University, Harbin, Heilongjiang 150086, China.</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                    Affiliation\n",
       "index                                                          \n",
       "M&M Medical BioInformatics, Hongo 113-0033, Japan.           16\n",
       "College of Bioinformatics Science and Technolog...            7\n",
       "Department of Genetics, Stanford University Sch...            7\n",
       "University of Chinese Academy of Sciences, Beij...            7\n",
       "Institute for Systems Biology, Seattle, WA, USA.              5\n",
       "Luxembourg Centre for Systems Biomedicine, Univ...            5\n",
       "Mosaiques Diagnostics GmbH, Hannover, Germany.                5\n",
       "Biological Sciences Division, Pacific Northwest...            4\n",
       "Blavatnik School of Computer Science, Tel Aviv ...            4\n",
       "College of Bioinformatics Science and Technolog...            4"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "affiliations[['Affiliation', 'PMID']].drop_duplicates().Affiliation.sorted_value_counts().head(10).to_frame()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We were previously getting false hits because we were matching by affiliations:\n",
    " - \"Multi-Omics Based Creative Drug Research Team, Kyungpook National University, Daegu 41566, Republic of Korea\"\n",
    " - \"Panomics, Inc\"\n",
    " \n",
    "so it is important to check if no affiliations overrepresented. Would need a cleanup to be more reliable (not a priority)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###  Authors"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: not neccessarily unique persons, adoption of ORCID still low:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>JointName</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Masaru Katoh</th>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Richard D Smith</th>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Paul Wilmes</th>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Jens Nielsen</th>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Young-Mo Kim</th>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Thomas O Metz</th>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Xia Li</th>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Bernhard O Palsson</th>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yuriko Katoh</th>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Bing Zhang</th>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>James G Wilson</th>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Li Wang</th>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Carrie D Nicora</th>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Claudio Angione</th>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Jerome I Rotter</th>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    JointName\n",
       "index                        \n",
       "Masaru Katoh               26\n",
       "Richard D Smith            19\n",
       "Paul Wilmes                18\n",
       "Jens Nielsen               17\n",
       "Young-Mo Kim               17\n",
       "Thomas O Metz              16\n",
       "Xia Li                     16\n",
       "Bernhard O Palsson         14\n",
       "Yuriko Katoh               14\n",
       "Bing Zhang                 13\n",
       "James G Wilson             13\n",
       "Li Wang                    13\n",
       "Carrie D Nicora            12\n",
       "Claudio Angione            12\n",
       "Jerome I Rotter            12"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "authors['JointName'].sorted_value_counts().head(15).to_frame()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Publication kind and type"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "index\n",
       "article            3453\n",
       "article in book       3\n",
       "Name: kind, dtype: int64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "literature.kind.sorted_value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "744"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(literature['Is Review'] == True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Journal Article</th>\n",
       "      <td>3370</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Research Support, Non-U.S. Gov't</th>\n",
       "      <td>1371</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Review</th>\n",
       "      <td>744</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Research Support, N.I.H., Extramural</th>\n",
       "      <td>460</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Research Support, U.S. Gov't, Non-P.H.S.</th>\n",
       "      <td>161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Comparative Study</th>\n",
       "      <td>61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Editorial</th>\n",
       "      <td>44</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Comment</th>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Clinical Trial</th>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Published Erratum</th>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Multicenter Study</th>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Research Support, N.I.H., Intramural</th>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Evaluation Study</th>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Letter</th>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Case Reports</th>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dataset</th>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Introductory Journal Article</th>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Observational Study</th>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Twin Study</th>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Validation Study</th>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>English Abstract</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Randomized Controlled Trial</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Systematic Review</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Video-Audio Media</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Meta-Analysis</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Research Support, U.S. Gov't, P.H.S.</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Congress</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Interview</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>News</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Clinical Trial, Phase II</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Consensus Development Conference, NIH</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Controlled Clinical Trial</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Historical Article</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Practice Guideline</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Preprint</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                          count\n",
       "index                                          \n",
       "Journal Article                            3370\n",
       "Research Support, Non-U.S. Gov't           1371\n",
       "Review                                      744\n",
       "Research Support, N.I.H., Extramural        460\n",
       "Research Support, U.S. Gov't, Non-P.H.S.    161\n",
       "Comparative Study                            61\n",
       "Editorial                                    44\n",
       "Comment                                      37\n",
       "Clinical Trial                               26\n",
       "Published Erratum                            23\n",
       "Multicenter Study                            21\n",
       "Research Support, N.I.H., Intramural         16\n",
       "Evaluation Study                             13\n",
       "Letter                                       13\n",
       "Case Reports                                  9\n",
       "Dataset                                       9\n",
       "Introductory Journal Article                  7\n",
       "Observational Study                           7\n",
       "Twin Study                                    7\n",
       "Validation Study                              7\n",
       "English Abstract                              6\n",
       "Randomized Controlled Trial                   6\n",
       "Systematic Review                             6\n",
       "Video-Audio Media                             6\n",
       "Meta-Analysis                                 5\n",
       "Research Support, U.S. Gov't, P.H.S.          5\n",
       "Congress                                      4\n",
       "Interview                                     3\n",
       "News                                          2\n",
       "Clinical Trial, Phase II                      1\n",
       "Consensus Development Conference, NIH         1\n",
       "Controlled Clinical Trial                     1\n",
       "Historical Article                            1\n",
       "Practice Guideline                            1\n",
       "Preprint                                      1"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "publication_types['0'].sorted_value_counts().to_frame('count')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>doi</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>uid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>21889780</th>\n",
       "      <td>[OMICS and biomarkers of glial tumors].</td>\n",
       "      <td>10.1016/j.neurol.2011.07.007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22490743</th>\n",
       "      <td>[Application of an integrated omics analysis f...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24292186</th>\n",
       "      <td>[Biomarker exploration and its clinical use].</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25744640</th>\n",
       "      <td>[Gut microbiota, host defense and immunity: an...</td>\n",
       "      <td>10.2177/jsci.37.403</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26351165</th>\n",
       "      <td>[Identification of disease targets for precisi...</td>\n",
       "      <td>10.16288/j.yczz.15-061</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32694108</th>\n",
       "      <td>[Comprehensive re-annotation of protein-coding...</td>\n",
       "      <td>10.16288/j.yczz.20-022</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      title  \\\n",
       "uid                                                           \n",
       "21889780            [OMICS and biomarkers of glial tumors].   \n",
       "22490743  [Application of an integrated omics analysis f...   \n",
       "24292186      [Biomarker exploration and its clinical use].   \n",
       "25744640  [Gut microbiota, host defense and immunity: an...   \n",
       "26351165  [Identification of disease targets for precisi...   \n",
       "32694108  [Comprehensive re-annotation of protein-coding...   \n",
       "\n",
       "                                   doi  \n",
       "uid                                     \n",
       "21889780  10.1016/j.neurol.2011.07.007  \n",
       "22490743                           NaN  \n",
       "24292186                           NaN  \n",
       "25744640           10.2177/jsci.37.403  \n",
       "26351165        10.16288/j.yczz.15-061  \n",
       "32694108        10.16288/j.yczz.20-022  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "literature[literature['Is English Abstract'] == True][['title', 'doi']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>doi</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>uid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>22424393</th>\n",
       "      <td>Q &amp; A: the Snyderome.</td>\n",
       "      <td>10.1186/gb-2012-13-3-147</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31429661</th>\n",
       "      <td>Interview with Prof. K. Yalçın Arga: A Pioneer...</td>\n",
       "      <td>10.1089/omi.2019.0131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31755849</th>\n",
       "      <td>Interview with Prof. Collet Dandara: A Pioneer...</td>\n",
       "      <td>10.1089/omi.2019.0174</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      title  \\\n",
       "uid                                                           \n",
       "22424393                              Q & A: the Snyderome.   \n",
       "31429661  Interview with Prof. K. Yalçın Arga: A Pioneer...   \n",
       "31755849  Interview with Prof. Collet Dandara: A Pioneer...   \n",
       "\n",
       "                               doi  \n",
       "uid                                 \n",
       "22424393  10.1186/gb-2012-13-3-147  \n",
       "31429661     10.1089/omi.2019.0131  \n",
       "31755849     10.1089/omi.2019.0174  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "literature[literature['Is Interview'] == True][['title', 'doi']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>doi</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>uid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>12186644</th>\n",
       "      <td>Integromics: challenges in data integration.</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27164162</th>\n",
       "      <td>Emergence of Biomolecular Pathways to Define N...</td>\n",
       "      <td>10.1165/rcmb.2016-0141PS</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29339647</th>\n",
       "      <td>Proceedings of the 11th Congress of the Intern...</td>\n",
       "      <td>10.1159/000485799</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31394729</th>\n",
       "      <td>Novel Strategies for Cancer Treatment: Highlig...</td>\n",
       "      <td>10.3390/cancers11081125</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      title  \\\n",
       "uid                                                           \n",
       "12186644       Integromics: challenges in data integration.   \n",
       "27164162  Emergence of Biomolecular Pathways to Define N...   \n",
       "29339647  Proceedings of the 11th Congress of the Intern...   \n",
       "31394729  Novel Strategies for Cancer Treatment: Highlig...   \n",
       "\n",
       "                               doi  \n",
       "uid                                 \n",
       "12186644                       NaN  \n",
       "27164162  10.1165/rcmb.2016-0141PS  \n",
       "29339647         10.1159/000485799  \n",
       "31394729   10.3390/cancers11081125  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "literature[literature['Is Congress'] == True][['title', 'doi']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>doi</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>uid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>29301609</th>\n",
       "      <td>Integration of metabolomics and transcriptomic...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32475383</th>\n",
       "      <td>From genome sequencing to the discovery of pot...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      title  doi\n",
       "uid                                                             \n",
       "29301609  Integration of metabolomics and transcriptomic...  NaN\n",
       "32475383  From genome sequencing to the discovery of pot...  NaN"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "literature[literature['Is News'] == True][['title', 'doi']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>doi</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>uid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>25632108</th>\n",
       "      <td>CyanOmics: an integrated database of omics for...</td>\n",
       "      <td>10.1093/database/bau127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26130662</th>\n",
       "      <td>toxoMine: an integrated omics data warehouse f...</td>\n",
       "      <td>10.1093/database/bav066</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26646939</th>\n",
       "      <td>Multi-omic profiles of human non-alcoholic fat...</td>\n",
       "      <td>10.1038/sdata.2015.68</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27504011</th>\n",
       "      <td>MODEM: multi-omics data envelopment and mining...</td>\n",
       "      <td>10.1093/database/baw117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29087370</th>\n",
       "      <td>Monitoring microbial responses to ocean deoxyg...</td>\n",
       "      <td>10.1038/sdata.2017.158</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30084846</th>\n",
       "      <td>A multi-omic atlas of the human frontal cortex...</td>\n",
       "      <td>10.1038/sdata.2018.142</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30204156</th>\n",
       "      <td>The Mount Sinai cohort of large-scale genomic,...</td>\n",
       "      <td>10.1038/sdata.2018.185</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30621600</th>\n",
       "      <td>The 1000IBD project: multi-omics data of 1000 ...</td>\n",
       "      <td>10.1186/s12876-018-0917-5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31201317</th>\n",
       "      <td>Multi omics analysis of fibrotic kidneys in tw...</td>\n",
       "      <td>10.1038/s41597-019-0095-5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      title  \\\n",
       "uid                                                           \n",
       "25632108  CyanOmics: an integrated database of omics for...   \n",
       "26130662  toxoMine: an integrated omics data warehouse f...   \n",
       "26646939  Multi-omic profiles of human non-alcoholic fat...   \n",
       "27504011  MODEM: multi-omics data envelopment and mining...   \n",
       "29087370  Monitoring microbial responses to ocean deoxyg...   \n",
       "30084846  A multi-omic atlas of the human frontal cortex...   \n",
       "30204156  The Mount Sinai cohort of large-scale genomic,...   \n",
       "30621600  The 1000IBD project: multi-omics data of 1000 ...   \n",
       "31201317  Multi omics analysis of fibrotic kidneys in tw...   \n",
       "\n",
       "                                doi  \n",
       "uid                                  \n",
       "25632108    10.1093/database/bau127  \n",
       "26130662    10.1093/database/bav066  \n",
       "26646939      10.1038/sdata.2015.68  \n",
       "27504011    10.1093/database/baw117  \n",
       "29087370     10.1038/sdata.2017.158  \n",
       "30084846     10.1038/sdata.2018.142  \n",
       "30204156     10.1038/sdata.2018.185  \n",
       "30621600  10.1186/s12876-018-0917-5  \n",
       "31201317  10.1038/s41597-019-0095-5  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "literature[literature['Is Dataset'] == True][['title', 'doi']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Journals"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "journal_freq = literature.journal.sorted_value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>journal</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Scientific reports</th>\n",
       "      <td>126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Omics : a journal of integrative biology</th>\n",
       "      <td>78</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PloS one</th>\n",
       "      <td>69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Bioinformatics (Oxford, England)</th>\n",
       "      <td>68</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Nature communications</th>\n",
       "      <td>58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Frontiers in genetics</th>\n",
       "      <td>55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Journal of proteomics</th>\n",
       "      <td>53</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BMC bioinformatics</th>\n",
       "      <td>52</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Nucleic acids research</th>\n",
       "      <td>45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Methods in molecular biology (Clifton, N.J.)</th>\n",
       "      <td>43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Journal of proteome research</th>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BMC genomics</th>\n",
       "      <td>41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Oncotarget</th>\n",
       "      <td>39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>International journal of molecular sciences</th>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Briefings in bioinformatics</th>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Frontiers in microbiology</th>\n",
       "      <td>34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Molecular &amp; cellular proteomics : MCP</th>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mSystems</th>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cell reports</th>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Metabolites</th>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              journal\n",
       "index                                                \n",
       "Scientific reports                                126\n",
       "Omics : a journal of integrative biology           78\n",
       "PloS one                                           69\n",
       "Bioinformatics (Oxford, England)                   68\n",
       "Nature communications                              58\n",
       "Frontiers in genetics                              55\n",
       "Journal of proteomics                              53\n",
       "BMC bioinformatics                                 52\n",
       "Nucleic acids research                             45\n",
       "Methods in molecular biology (Clifton, N.J.)       43\n",
       "Journal of proteome research                       42\n",
       "BMC genomics                                       41\n",
       "Oncotarget                                         39\n",
       "International journal of molecular sciences        37\n",
       "Briefings in bioinformatics                        36\n",
       "Frontiers in microbiology                          34\n",
       "Molecular & cellular proteomics : MCP              29\n",
       "mSystems                                           28\n",
       "Cell reports                                       26\n",
       "Metabolites                                        26"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "journal_freq.head(20).to_frame()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Sanity check (is any of the top names not unique?) - the numbers should be same if counting by ISSN:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "index\n",
       "2045-2322    126\n",
       "1557-8100     77\n",
       "1932-6203     69\n",
       "1367-4811     68\n",
       "2041-1723     58\n",
       "1664-8021     55\n",
       "1876-7737     53\n",
       "1471-2105     52\n",
       "1362-4962     45\n",
       "1940-6029     42\n",
       "Name: journal_issn, dtype: int64"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "literature.journal_issn.sorted_value_counts().head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "index\n",
       "Nucleic acids research                                                     45\n",
       "Methods in molecular biology (Clifton, N.J.)                               43\n",
       "Journal of proteome research                                               42\n",
       "BMC genomics                                                               41\n",
       "Oncotarget                                                                 39\n",
       "                                                                           ..\n",
       "Zhongguo yi xue ke xue yuan xue bao. Acta Academiae Medicinae Sinicae       1\n",
       "Zhonghua nan ke xue = National journal of andrology                         1\n",
       "Zhonghua yu fang yi xue za zhi [Chinese journal of preventive medicine]     1\n",
       "Zoology (Jena, Germany)                                                     1\n",
       "mSphere                                                                     1\n",
       "Name: journal, Length: 967, dtype: int64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "journal_freq[journal_freq < 50]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "literature = literature.replace({float('nan'): None}).infer_objects()\n",
    "%R -i literature"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Publication types"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%R\n",
    "library(ComplexUpset)\n",
    "source('helpers/plots.R')\n",
    "source('helpers/colors.R')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "publication_types_list = ['Is ' + t for t in publication_types['0'].sorted_value_counts().where(lambda x: x > 10).dropna().index]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "R[write to console]: Scale for 'y' is already present. Adding another scale for 'y', which will\n",
      "replace the existing scale.\n",
      "\n"
     ]
    },
    {
     "data": {
      "image/png": "\n"
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%R -i publication_types_list -w 2000 -r 100 -h 800\n",
    "\n",
    "upset(\n",
    "    literature,\n",
    "    publication_types_list,\n",
    "    base_annotations=list(\n",
    "        'Intersection size'=intersection_size(\n",
    "            text=list(angle=90, vjust=0.5, hjust=0)\n",
    "        )\n",
    "    ),\n",
    "    width_ratio=0.1,\n",
    "    set_sizes=(\n",
    "        upset_set_size(\n",
    "            geom=geom_bar(width=0.5)\n",
    "        )\n",
    "        + scale_y_continuous(trans=reverse_log_trans())\n",
    "        + theme(axis.text.x=element_text(angle=90))\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "\n"
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%R -w 700 -h 400 -r 100\n",
    "(\n",
    "    ggplot(literature, aes(x=year, fill=has_doi))\n",
    "    + geom_bar()\n",
    "    + theme_bw()\n",
    ") + (\n",
    "    ggplot(literature, aes(x=year, fill=has_pmc))\n",
    "    + geom_bar()\n",
    "    + theme_bw()\n",
    ") & plot_layout(ncol=1)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}