[1bd6b5]: / notebooks / Source_code.ipynb

Download this file

1814 lines (1813 with data), 89.6 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%run notebook_setup.ipynb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "Imported `literature` (904B0F94) at Saturday, 25. Jul 2020 15:56"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {
      "text/markdown": {
       "action": "import",
       "command": "from pubmed_derived_data import literature",
       "finished": "2020-07-25T15:56:17.417838",
       "finished_human_readable": "Saturday, 25. Jul 2020 15:56",
       "result": [
        {
         "new_file": {
          "crc32": "904B0F94",
          "sha256": "A2EFC068A287A3B724AE4B320EE5356E1E99474BD08A2E2A3EBA34CD0194F23B"
         },
         "subject": "literature"
        }
       ],
       "started": "2020-07-25T15:56:15.499885"
      }
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "%vault from pubmed_derived_data import literature"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "code_repositories = literature.index.to_frame().drop(columns='uid').copy()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Code archives and repositories extraction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from re import escape\n",
    "from pandas import Series\n",
    "\n",
    "from repository_detection import (\n",
    "    source_code_platforms, mixed_publication_platforms, data_only_platforms,\n",
    "    all_platforms as platforms\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'github': '(github\\\\.com/\\\\S+/\\\\S+)',\n",
       " 'gitlab': '(gitlab\\\\.com/\\\\S+/\\\\S+)',\n",
       " 'sourceforge': '(sourceforge\\\\.net/\\\\S+)',\n",
       " 'bitbucket': '(bitbucket\\\\.org/\\\\S+)',\n",
       " '.git': '(\\\\S+:\\\\S+\\\\.git\\\\S*)',\n",
       " 'cran': 'cran\\\\.r-project\\\\.org/(?:web/packages/|package=)(\\\\S+)(?:/\\\\b|/$|\\\\s|\\\\.$|\\\\)|$)',\n",
       " 'pypi': 'pypi\\\\.python\\\\.org/pypi/(\\\\S+)(?:/\\\\b|/$|\\\\s|\\\\.$|\\\\)|$)'}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "source_code_platforms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'zenodo': 'doi\\\\.org/10.5281/(zenodo\\\\.\\\\d+?)(?:/\\\\b|/$|\\\\s|\\\\.$|\\\\)|$)',\n",
       " 'bioconductor': 'bioconductor.org/packages/(\\\\S+)(?:/\\\\b|/$|\\\\s|\\\\.$|\\\\)|$)',\n",
       " 'osf': 'osf.io/(\\\\S+){slash_or_end}'}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mixed_publication_platforms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'dryad': 'datadryad.org/(\\\\S+)(?:/\\\\b|/$|\\\\s|\\\\.$|\\\\)|$)'}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_only_platforms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def collapse_lists(lists):\n",
    "    return sorted(set(sum(lists, [])))\n",
    "\n",
    "\n",
    "fields = Series(['abstract', 'full_text'])\n",
    "\n",
    "for platform in platforms:\n",
    "    for field in fields:\n",
    "        code_repositories[f'{field}_mentions_{platform}'] = literature[field].str.lower().str.contains(escape(platform)) == True\n",
    "        matches = literature[field].astype(str).str.findall(platforms[platform])\n",
    "        code_repositories[f'{field}_{platform}_matches'] = matches\n",
    "    code_repositories[f'mentions_{platform}'] = code_repositories[fields + f'_mentions_{platform}'].any(axis=1)\n",
    "    code_repositories[f'{platform}_matches'] = code_repositories[fields + f'_{platform}_matches'].apply(collapse_lists, axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Ext-link handling is not needed for abstracts:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "assert not literature.abstract.str.contains('ext-link').any()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "abstract_mentions_github          54\n",
       "abstract_mentions_gitlab           2\n",
       "abstract_mentions_sourceforge      4\n",
       "abstract_mentions_bitbucket        3\n",
       "abstract_mentions_.git             2\n",
       "abstract_mentions_cran            19\n",
       "abstract_mentions_pypi             2\n",
       "abstract_mentions_zenodo           1\n",
       "abstract_mentions_bioconductor    16\n",
       "abstract_mentions_osf              1\n",
       "abstract_mentions_dryad            0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "code_repositories[[f'abstract_mentions_{platform}' for platform in platforms]].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "full_text_mentions_github          205\n",
       "full_text_mentions_gitlab            5\n",
       "full_text_mentions_sourceforge      44\n",
       "full_text_mentions_bitbucket        15\n",
       "full_text_mentions_.git             54\n",
       "full_text_mentions_cran             68\n",
       "full_text_mentions_pypi              5\n",
       "full_text_mentions_zenodo           13\n",
       "full_text_mentions_bioconductor    106\n",
       "full_text_mentions_osf               4\n",
       "full_text_mentions_dryad             1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "code_repositories[[f'full_text_mentions_{platform}' for platform in platforms]].sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The sourceforge mentions might be uses of tools."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "mentions_github          245\n",
       "mentions_gitlab            5\n",
       "mentions_sourceforge      46\n",
       "mentions_bitbucket        17\n",
       "mentions_.git             56\n",
       "mentions_cran             82\n",
       "mentions_pypi              7\n",
       "mentions_zenodo           14\n",
       "mentions_bioconductor    116\n",
       "mentions_osf               5\n",
       "mentions_dryad             1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "code_repositories[[f'mentions_{platform}' for platform in platforms]].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "github_matches          [github.com/Magdoll//ECE, github.com/wizardfan...\n",
       "gitlab_matches          [gitlab.com/Gustafsson-lab/lassim, gitlab.com/...\n",
       "sourceforge_matches     [sourceforge.net/., sourceforge.net/projects/a...\n",
       "bitbucket_matches       [bitbucket.org/hbc/galaxy-central-hbc, bitbuck...\n",
       ".git_matches            [http://networkx.github.io/, http://broadinsti...\n",
       "cran_matches            [gplots/index.html, iSubpathwayMiner/, mixOmic...\n",
       "pypi_matches                             [omics_pipe), MACS2, multiview.]\n",
       "zenodo_matches          [zenodo.35611, zenodo.546110, zenodo.1154124, ...\n",
       "bioconductor_matches    [release/data/annotation/html/hgu133plus2.db.h...\n",
       "osf_matches                                                            []\n",
       "dryad_matches                                                          []\n",
       "dtype: object"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "code_repositories[[f'{platform}_matches' for platform in platforms]].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "index\n",
       "http://broadinstitute.github.io/picard/                                                                                              11\n",
       "http://broadinstitute.github.io/picard                                                                                                4\n",
       "http://cole-trapnell-lab.github.io/cufflinks/                                                                                         3\n",
       "https://broadinstitute.github.io/picard/                                                                                              3\n",
       "http://zwdzwd.github.io/InfiniumAnnotation                                                                                            2\n",
       "https://trinotate.github.io                                                                                                           2\n",
       "http://cole-trapnell-lab.github.io/cufflinks/cuffmerge/                                                                               1\n",
       "http://cole-trapnell-lab.github.io/cufflinks/releases/v2.2.1/                                                                         1\n",
       "http://cole-trapnell-lab.github.io/monocle-release/articles/v2.0.0                                                                    1\n",
       "http://galaxyproject.github.io/training-material                                                                                      1\n",
       "http://kwanjeeraw.github.io/grinn/                                                                                                    1\n",
       "http://linsalrob.github.io/PyFBA/                                                                                                     1\n",
       "http://mzmine.github.io/                                                                                                              1\n",
       "http://networkx.github.io/                                                                                                            1\n",
       "http://picrust.github.com/picrust/                                                                                                    1\n",
       "http://sbgn.github.io/sbgn/                                                                                                           1\n",
       "http://topepo.github.io/caret/index.html                                                                                              1\n",
       "http://varadanlab.github.io/InFlo/                                                                                                    1\n",
       "http://www.github.com/baolinwu/MSKAT.                                                                                                 1\n",
       "http://yjjang.github.io/mongkie                                                                                                       1\n",
       "https://broadinstitute.github.io/picard                                                                                               1\n",
       "https://chun-weitung.gitbooks.io/chemdis/content/data-sources.html                                                                    1\n",
       "https://chun-weitung.gitbooks.io/chemdis/content/web-api.html                                                                         1\n",
       "https://funricegenes.github.io/                                                                                                       1\n",
       "https://galaxyproject.github.io/training-material/topics/proteomics/tutorials/metaproteomics/tutorial.html                            1\n",
       "https://galaxyproject.github.io/training-material/topics/proteomics/tutorials/proteogenomics-novel-peptide-analysis/tutorial.html     1\n",
       "https://ginolhac.github.io/mapDamage/                                                                                                 1\n",
       "https://gist.github.com/yannabraham/c1f9de9b23fb94105ca5                                                                              1\n",
       "https://github.com/TiphaineCMartin/multiomic_AITD.git                                                                                 1\n",
       "https://github.com/biopython/biopython.github.io/                                                                                     1\n",
       "https://github.com/galaxyproteomics/mvpapplication-git.git                                                                            1\n",
       "https://github.com/ucdavis-bioinformatics/sickle.git                                                                                  1\n",
       "https://github.com/xu1912/SMON.git                                                                                                    1\n",
       "https://jcupitt.github.io/libvips/                                                                                                    1\n",
       "https://jheatmap.github.io/jheatmap/                                                                                                  1\n",
       "https://oliviaab.github.io/sismonr/.                                                                                                  1\n",
       "https://opencobra.github.io/                                                                                                          1\n",
       "https://openseadragon.github.io/                                                                                                      1\n",
       "https://rrshieldscutler.github.io/splinectomeR/                                                                                       1\n",
       "https://rstudio.github.io/DT/                                                                                                         1\n",
       "https://samtools.github.io/hts-specs/SAMv1.pdf                                                                                        1\n",
       "https://transdecoder.github.io                                                                                                        1\n",
       "https://transdecoder.github.io/                                                                                                       1\n",
       "https://xjtu-funet-source.github.io/FuNet/FuNet.html                                                                                  1\n",
       "Name: 0, dtype: int64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Series(code_repositories['.git_matches'].sum()).sorted_value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Limitations\n",
    "\n",
    " - does not account for GitLab instances in custom domains"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Screen for code availability statements"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can compose a list of common code availability phrases and use that to see what are the other ways of sharing code."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "index\n",
       "Scientific reports                              126\n",
       "Omics : a journal of integrative biology         78\n",
       "PloS one                                         69\n",
       "Bioinformatics (Oxford, England)                 68\n",
       "Nature communications                            58\n",
       "Frontiers in genetics                            55\n",
       "Journal of proteomics                            53\n",
       "BMC bioinformatics                               52\n",
       "Nucleic acids research                           45\n",
       "Methods in molecular biology (Clifton, N.J.)     43\n",
       "Name: journal, dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "journals_with_most_papers = literature['journal'].sorted_value_counts().head(20)\n",
    "journals_with_most_papers.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "literature = literature.drop(columns=list(set(literature.columns) & set(code_repositories.columns)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "72"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "likely_to_contain_code = Series(list(source_code_platforms) + list(mixed_publication_platforms))\n",
    "any_source_link = code_repositories['abstract_' + likely_to_contain_code + '_matches'].any(axis=1)\n",
    "any_source_link.sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: this does not have to their source code - can be of a tool they used!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "literature['has_source_code_link'] = any_source_link"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>journal</th>\n",
       "      <th>papers_with_code_link</th>\n",
       "      <th>common_n_grams</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Scientific reports</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Omics : a journal of integrative biology</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>PloS one</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "      <td>42</td>\n",
       "      <td>[availability and implementation, https github com, supplementary information supplementary data are available at bioinformatics online]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Nature communications</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Frontiers in genetics</td>\n",
       "      <td>5</td>\n",
       "      <td>[available at https github com, co expression modules, deep learning based, dysfunctional subpathways we, is available at https, multi omics data, number of subtypes]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Journal of proteomics</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>BMC bioinformatics</td>\n",
       "      <td>6</td>\n",
       "      <td>[available at https, chronic lymphocytic leukaemia, is freely available]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Nucleic acids research</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Methods in molecular biology (Clifton, N.J.)</td>\n",
       "      <td>1</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Journal of proteome research</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>BMC genomics</td>\n",
       "      <td>2</td>\n",
       "      <td>[candidate synthetic lethal, loss of function, synthetic lethal gene pairs]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Oncotarget</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>International journal of molecular sciences</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Briefings in bioinformatics</td>\n",
       "      <td>3</td>\n",
       "      <td>[canonical correlation analysis]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>Frontiers in microbiology</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Molecular &amp; cellular proteomics : MCP</td>\n",
       "      <td>1</td>\n",
       "      <td>[from multiple experiments, of gene sets]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>mSystems</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Cell reports</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Metabolites</td>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                         journal  papers_with_code_link  \\\n",
       "0                             Scientific reports                      0   \n",
       "1       Omics : a journal of integrative biology                      0   \n",
       "2                                       PloS one                      0   \n",
       "3               Bioinformatics (Oxford, England)                     42   \n",
       "4                          Nature communications                      0   \n",
       "5                          Frontiers in genetics                      5   \n",
       "6                          Journal of proteomics                      0   \n",
       "7                             BMC bioinformatics                      6   \n",
       "8                         Nucleic acids research                      0   \n",
       "9   Methods in molecular biology (Clifton, N.J.)                      1   \n",
       "10                  Journal of proteome research                      0   \n",
       "11                                  BMC genomics                      2   \n",
       "12                                    Oncotarget                      0   \n",
       "13   International journal of molecular sciences                      0   \n",
       "14                   Briefings in bioinformatics                      3   \n",
       "15                     Frontiers in microbiology                      0   \n",
       "16         Molecular & cellular proteomics : MCP                      1   \n",
       "17                                      mSystems                      0   \n",
       "18                                  Cell reports                      0   \n",
       "19                                   Metabolites                      0   \n",
       "\n",
       "                                                                                                                                                            common_n_grams  \n",
       "0                                                                                                                                                                       []  \n",
       "1                                                                                                                                                                       []  \n",
       "2                                                                                                                                                                       []  \n",
       "3                                 [availability and implementation, https github com, supplementary information supplementary data are available at bioinformatics online]  \n",
       "4                                                                                                                                                                       []  \n",
       "5   [available at https github com, co expression modules, deep learning based, dysfunctional subpathways we, is available at https, multi omics data, number of subtypes]  \n",
       "6                                                                                                                                                                       []  \n",
       "7                                                                                                 [available at https, chronic lymphocytic leukaemia, is freely available]  \n",
       "8                                                                                                                                                                       []  \n",
       "9                                                                                                                                                                       []  \n",
       "10                                                                                                                                                                      []  \n",
       "11                                                                                             [candidate synthetic lethal, loss of function, synthetic lethal gene pairs]  \n",
       "12                                                                                                                                                                      []  \n",
       "13                                                                                                                                                                      []  \n",
       "14                                                                                                                                        [canonical correlation analysis]  \n",
       "15                                                                                                                                                                      []  \n",
       "16                                                                                                                               [from multiple experiments, of gene sets]  \n",
       "17                                                                                                                                                                      []  \n",
       "18                                                                                                                                                                      []  \n",
       "19                                                                                                                                                                      []  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas\n",
    "pandas.set_option('display.max_colwidth', 500)\n",
    "from pandas import DataFrame\n",
    "from helpers.n_grams import find_longest_common_n_grams\n",
    "\n",
    "\n",
    "statments = []\n",
    "\n",
    "for journal in journals_with_most_papers.index:\n",
    "    relevant_papers = literature[(literature.journal == journal) & literature.has_source_code_link]\n",
    "    relevant_papers_with_abstracts = relevant_papers[~relevant_papers.abstract.isnull()]\n",
    "\n",
    "    statments.append({\n",
    "        'journal': journal,\n",
    "        'papers_with_code_link': len(relevant_papers),\n",
    "        'common_n_grams': find_longest_common_n_grams(\n",
    "            data=relevant_papers_with_abstracts.abstract,\n",
    "            min_words=3, max_words=10,\n",
    "            min_count=3, min_frequency=0.5\n",
    "        )\n",
    "    })\n",
    "\n",
    "DataFrame(statments)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Only _Bioinformatics (Oxford, England)_, _Frontiers in genetics_ and _BMC bioinformatics_ had n-grams which looked like code availability statements."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "compact_code_links_summary = (\n",
    "    code_repositories['abstract_' + Series(list(platforms)) + '_matches']\n",
    "    .rename(columns=lambda c: c.replace('abstract_', '').replace('_matches', ''))\n",
    "    .apply(lambda x: x[x.apply(len) != 0].to_dict(), axis=1)\n",
    "    .to_frame('detected_code_links')\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_statments(expression):\n",
    "    return (\n",
    "        literature['abstract'].str.lower()\n",
    "        .str.extract(expression).dropna()\n",
    "        .rename(columns={0: 'match'})\n",
    "        .join(compact_code_links_summary)\n",
    "        .join(literature)[['match', 'detected_code_links', 'journal']]\n",
    "        .rename(columns=lambda x: x.replace('abstract_', ''))\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "pandas.set_option('display.max_colwidth', 100)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### BMC bioinformatics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>match</th>\n",
       "      <th>detected_code_links</th>\n",
       "      <th>journal</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>uid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>17468122</th>\n",
       "      <td>at http://integromics.kobic.re.kr/gazer/.</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19706745</th>\n",
       "      <td>from http://cran.r-project.org/ or from the web site companion (http://math.univ-toulouse.fr/bio...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26568631</th>\n",
       "      <td>at http://biosignal.med.upatras.gr/chronos/</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27814671</th>\n",
       "      <td>as a bioconductor r package.</td>\n",
       "      <td>{}</td>\n",
       "      <td>BMC genomics</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28961954</th>\n",
       "      <td>on request. 50 gb of space is allocated for data storage, with unrestricted number of samples an...</td>\n",
       "      <td>{'github': ['github.com/RonanDaly/pimp']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29931190</th>\n",
       "      <td>at https://github.com/mkanai/grimon as an r package with example omics data sets.</td>\n",
       "      <td>{'github': ['github.com/mkanai/grimon']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30202885</th>\n",
       "      <td>at https://github.com/xuesidong/tobmi.</td>\n",
       "      <td>{'github': ['github.com/XuesiDong/TOBMI.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30598101</th>\n",
       "      <td>at http://bioinfo.au.tsinghua.edu.cn/jianglab/csnets/ .</td>\n",
       "      <td>{}</td>\n",
       "      <td>BMC genomics</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30863842</th>\n",
       "      <td>for downloading from https://github.com/pfruan/absnf.</td>\n",
       "      <td>{'github': ['github.com/pfruan/abSNF.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30957844</th>\n",
       "      <td>on the web at https://cran.r-project.org/web/packages/smccnet/index.html.</td>\n",
       "      <td>{'cran': ['SmCCNet/index.html.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                                                        match  \\\n",
       "uid                                                                                                             \n",
       "17468122                                                            at http://integromics.kobic.re.kr/gazer/.   \n",
       "19706745  from http://cran.r-project.org/ or from the web site companion (http://math.univ-toulouse.fr/bio...   \n",
       "26568631                                                          at http://biosignal.med.upatras.gr/chronos/   \n",
       "27814671                                                                         as a bioconductor r package.   \n",
       "28961954  on request. 50 gb of space is allocated for data storage, with unrestricted number of samples an...   \n",
       "29931190                    at https://github.com/mkanai/grimon as an r package with example omics data sets.   \n",
       "30202885                                                               at https://github.com/xuesidong/tobmi.   \n",
       "30598101                                              at http://bioinfo.au.tsinghua.edu.cn/jianglab/csnets/ .   \n",
       "30863842                                                for downloading from https://github.com/pfruan/absnf.   \n",
       "30957844                            on the web at https://cran.r-project.org/web/packages/smccnet/index.html.   \n",
       "\n",
       "                                  detected_code_links  \\\n",
       "uid                                                     \n",
       "17468122                                           {}   \n",
       "19706745                                           {}   \n",
       "26568631                                           {}   \n",
       "27814671                                           {}   \n",
       "28961954    {'github': ['github.com/RonanDaly/pimp']}   \n",
       "29931190     {'github': ['github.com/mkanai/grimon']}   \n",
       "30202885  {'github': ['github.com/XuesiDong/TOBMI.']}   \n",
       "30598101                                           {}   \n",
       "30863842     {'github': ['github.com/pfruan/abSNF.']}   \n",
       "30957844            {'cran': ['SmCCNet/index.html.']}   \n",
       "\n",
       "                                   journal  \n",
       "uid                                         \n",
       "17468122  Bioinformatics (Oxford, England)  \n",
       "19706745  Bioinformatics (Oxford, England)  \n",
       "26568631  Bioinformatics (Oxford, England)  \n",
       "27814671                      BMC genomics  \n",
       "28961954  Bioinformatics (Oxford, England)  \n",
       "29931190  Bioinformatics (Oxford, England)  \n",
       "30202885  Bioinformatics (Oxford, England)  \n",
       "30598101                      BMC genomics  \n",
       "30863842  Bioinformatics (Oxford, England)  \n",
       "30957844  Bioinformatics (Oxford, England)  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_statments('is freely available (.*?)\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>match</th>\n",
       "      <th>detected_code_links</th>\n",
       "      <th>journal</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>uid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>26377073</th>\n",
       "      <td>https://github.com/yangzi4/inmf.</td>\n",
       "      <td>{'github': ['github.com/yangzi4/iNMF.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26961692</th>\n",
       "      <td>https://github.com/maxconway/snftool</td>\n",
       "      <td>{'github': ['github.com/maxconway/SNFtool']}</td>\n",
       "      <td>BMC bioinformatics</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28082455</th>\n",
       "      <td>https://github.com/olganikolova/gbgfa.</td>\n",
       "      <td>{'github': ['github.com/olganikolova/gbgfa.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28640810</th>\n",
       "      <td>https://gitlab.com/gustafsson-lab/lassim.</td>\n",
       "      <td>{'gitlab': ['gitlab.com/Gustafsson-lab/lassim.']}</td>\n",
       "      <td>PLoS computational biology</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28961954</th>\n",
       "      <td>https://github.com/ronandaly/pimp</td>\n",
       "      <td>{'github': ['github.com/RonanDaly/pimp']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29069501</th>\n",
       "      <td>https://jmorp.megabank.tohoku.ac.jp.</td>\n",
       "      <td>{}</td>\n",
       "      <td>Nucleic acids research</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29186355</th>\n",
       "      <td>https://github.com/mehr-een/bemkl-rbps.</td>\n",
       "      <td>{'github': ['github.com/mehr-een/bemkl-rbps.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29547932</th>\n",
       "      <td>https://github.com/cbg-ethz/netics.</td>\n",
       "      <td>{'github': ['github.com/cbg-ethz/netics.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29931190</th>\n",
       "      <td>https://github.com/mkanai/grimon</td>\n",
       "      <td>{'github': ['github.com/mkanai/grimon']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30202885</th>\n",
       "      <td>https://github.com/xuesidong/tobmi.</td>\n",
       "      <td>{'github': ['github.com/XuesiDong/TOBMI.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30556956</th>\n",
       "      <td>https://www.omicsnet.ca.</td>\n",
       "      <td>{}</td>\n",
       "      <td>Current protocols in bioinformatics</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30634897</th>\n",
       "      <td>https://sourceforge.net/projects/ms-helios/</td>\n",
       "      <td>{'sourceforge': ['sourceforge.net/projects/ms-helios/']}</td>\n",
       "      <td>BMC bioinformatics</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30768150</th>\n",
       "      <td>https://github.com/roosevelt-pku/drugcombinationprediction.</td>\n",
       "      <td>{'github': ['github.com/Roosevelt-PKU/drugcombinationprediction.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31092193</th>\n",
       "      <td>https://pathme.scai.fraunhofer.de/</td>\n",
       "      <td>{}</td>\n",
       "      <td>BMC bioinformatics</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31406990</th>\n",
       "      <td>https://github.com/vgteam/vg,</td>\n",
       "      <td>{'github': ['github.com/vgteam/vg,', 'github.com/jltsiren/gbwt', 'github.com/jltsiren/gcsa2.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31583639</th>\n",
       "      <td>https://keypathwayminer.compbio.sdu.dk</td>\n",
       "      <td>{}</td>\n",
       "      <td>Methods in molecular biology (Clifton, N.J.)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31642488</th>\n",
       "      <td>https://ccsm.uth.edu/exonskipdb/,</td>\n",
       "      <td>{}</td>\n",
       "      <td>Nucleic acids research</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31649722</th>\n",
       "      <td>https://targetmine.mizuguchilab.org.</td>\n",
       "      <td>{'github': ['github.com/chenyian-nibio/targetmine-gradle.']}</td>\n",
       "      <td>Frontiers in genetics</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31693075</th>\n",
       "      <td>https://github.com/yaluwen/omicpred.</td>\n",
       "      <td>{'github': ['github.com/YaluWen/OmicPred.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31913435</th>\n",
       "      <td>https://github.com/yaluwen/uomic.</td>\n",
       "      <td>{'github': ['github.com/YaluWen/Uomic.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32396365</th>\n",
       "      <td>https://zenodo.org/</td>\n",
       "      <td>{}</td>\n",
       "      <td>Journal of proteome research</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32657382</th>\n",
       "      <td>https://github.com/caokai1073/unioncom.</td>\n",
       "      <td>{'github': ['github.com/caokai1073/UnionCom.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32692809</th>\n",
       "      <td>https://github.com/cheminfo/compass.</td>\n",
       "      <td>{'github': ['github.com/cheminfo/COMPASS.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                match  \\\n",
       "uid                                                                     \n",
       "26377073                             https://github.com/yangzi4/inmf.   \n",
       "26961692                         https://github.com/maxconway/snftool   \n",
       "28082455                       https://github.com/olganikolova/gbgfa.   \n",
       "28640810                    https://gitlab.com/gustafsson-lab/lassim.   \n",
       "28961954                            https://github.com/ronandaly/pimp   \n",
       "29069501                         https://jmorp.megabank.tohoku.ac.jp.   \n",
       "29186355                      https://github.com/mehr-een/bemkl-rbps.   \n",
       "29547932                          https://github.com/cbg-ethz/netics.   \n",
       "29931190                             https://github.com/mkanai/grimon   \n",
       "30202885                          https://github.com/xuesidong/tobmi.   \n",
       "30556956                                     https://www.omicsnet.ca.   \n",
       "30634897                  https://sourceforge.net/projects/ms-helios/   \n",
       "30768150  https://github.com/roosevelt-pku/drugcombinationprediction.   \n",
       "31092193                           https://pathme.scai.fraunhofer.de/   \n",
       "31406990                                https://github.com/vgteam/vg,   \n",
       "31583639                       https://keypathwayminer.compbio.sdu.dk   \n",
       "31642488                            https://ccsm.uth.edu/exonskipdb/,   \n",
       "31649722                         https://targetmine.mizuguchilab.org.   \n",
       "31693075                         https://github.com/yaluwen/omicpred.   \n",
       "31913435                            https://github.com/yaluwen/uomic.   \n",
       "32396365                                          https://zenodo.org/   \n",
       "32657382                      https://github.com/caokai1073/unioncom.   \n",
       "32692809                         https://github.com/cheminfo/compass.   \n",
       "\n",
       "                                                                                      detected_code_links  \\\n",
       "uid                                                                                                         \n",
       "26377073                                                         {'github': ['github.com/yangzi4/iNMF.']}   \n",
       "26961692                                                     {'github': ['github.com/maxconway/SNFtool']}   \n",
       "28082455                                                   {'github': ['github.com/olganikolova/gbgfa.']}   \n",
       "28640810                                                {'gitlab': ['gitlab.com/Gustafsson-lab/lassim.']}   \n",
       "28961954                                                        {'github': ['github.com/RonanDaly/pimp']}   \n",
       "29069501                                                                                               {}   \n",
       "29186355                                                  {'github': ['github.com/mehr-een/bemkl-rbps.']}   \n",
       "29547932                                                      {'github': ['github.com/cbg-ethz/netics.']}   \n",
       "29931190                                                         {'github': ['github.com/mkanai/grimon']}   \n",
       "30202885                                                      {'github': ['github.com/XuesiDong/TOBMI.']}   \n",
       "30556956                                                                                               {}   \n",
       "30634897                                         {'sourceforge': ['sourceforge.net/projects/ms-helios/']}   \n",
       "30768150                              {'github': ['github.com/Roosevelt-PKU/drugcombinationprediction.']}   \n",
       "31092193                                                                                               {}   \n",
       "31406990  {'github': ['github.com/vgteam/vg,', 'github.com/jltsiren/gbwt', 'github.com/jltsiren/gcsa2.']}   \n",
       "31583639                                                                                               {}   \n",
       "31642488                                                                                               {}   \n",
       "31649722                                     {'github': ['github.com/chenyian-nibio/targetmine-gradle.']}   \n",
       "31693075                                                     {'github': ['github.com/YaluWen/OmicPred.']}   \n",
       "31913435                                                        {'github': ['github.com/YaluWen/Uomic.']}   \n",
       "32396365                                                                                               {}   \n",
       "32657382                                                  {'github': ['github.com/caokai1073/UnionCom.']}   \n",
       "32692809                                                     {'github': ['github.com/cheminfo/COMPASS.']}   \n",
       "\n",
       "                                               journal  \n",
       "uid                                                     \n",
       "26377073              Bioinformatics (Oxford, England)  \n",
       "26961692                            BMC bioinformatics  \n",
       "28082455              Bioinformatics (Oxford, England)  \n",
       "28640810                    PLoS computational biology  \n",
       "28961954              Bioinformatics (Oxford, England)  \n",
       "29069501                        Nucleic acids research  \n",
       "29186355              Bioinformatics (Oxford, England)  \n",
       "29547932              Bioinformatics (Oxford, England)  \n",
       "29931190              Bioinformatics (Oxford, England)  \n",
       "30202885              Bioinformatics (Oxford, England)  \n",
       "30556956           Current protocols in bioinformatics  \n",
       "30634897                            BMC bioinformatics  \n",
       "30768150              Bioinformatics (Oxford, England)  \n",
       "31092193                            BMC bioinformatics  \n",
       "31406990              Bioinformatics (Oxford, England)  \n",
       "31583639  Methods in molecular biology (Clifton, N.J.)  \n",
       "31642488                        Nucleic acids research  \n",
       "31649722                         Frontiers in genetics  \n",
       "31693075              Bioinformatics (Oxford, England)  \n",
       "31913435              Bioinformatics (Oxford, England)  \n",
       "32396365                  Journal of proteome research  \n",
       "32657382              Bioinformatics (Oxford, England)  \n",
       "32692809              Bioinformatics (Oxford, England)  "
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_statments('available at (https.*?)\\s')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Bioinformatics (Oxford)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>match</th>\n",
       "      <th>detected_code_links</th>\n",
       "      <th>journal</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>uid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>25527095</th>\n",
       "      <td>the netgestalt crc portal can be freely accessed at http://www.netgestalt.org.</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26377073</th>\n",
       "      <td>the source code repository is publicly available at https://github.com/yangzi4/inmf.</td>\n",
       "      <td>{'github': ['github.com/yangzi4/iNMF.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26568631</th>\n",
       "      <td>chronos is freely available at http://biosignal.med.upatras.gr/chronos/</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26787660</th>\n",
       "      <td>genetrail2 can be freely accessed under https://genetrail2.bioinf.uni-sb.de</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26883487</th>\n",
       "      <td>the source code, required data for prediction, and demo data for test are freely available at: h...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27797774</th>\n",
       "      <td>http://metagenomics.atc.tcs.com/webigloo and http://121.241.184.233/webigloo [freely available f...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28082455</th>\n",
       "      <td>: the code for this work is available at https://github.com/olganikolova/gbgfa.</td>\n",
       "      <td>{'github': ['github.com/olganikolova/gbgfa.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28334215</th>\n",
       "      <td>matlab codes for flux balance analysis in this study are available in supplementary material.</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28407042</th>\n",
       "      <td>the source code is at https://github.com/zhangxf-ccnu/pdna.</td>\n",
       "      <td>{'github': ['github.com/Zhangxf-ccnu/pDNA.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28520848</th>\n",
       "      <td>pfa has been implemented as a matlab package, which is available at http://www.sysbio.ac.cn/cb/c...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28605519</th>\n",
       "      <td>the package is implemented in r and available under gpl-2 license from the bioconductor website ...</td>\n",
       "      <td>{'bioconductor': ['CancerSubtypes/).']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28961954</th>\n",
       "      <td>pimp is available at http://polyomics.mvls.gla.ac.uk, and access is freely available on request....</td>\n",
       "      <td>{'github': ['github.com/RonanDaly/pimp']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29069296</th>\n",
       "      <td>https://kuppal.shinyapps.io/xmwas (online) and https://github.com/kuppal2/xmwas/ (r).</td>\n",
       "      <td>{'github': ['github.com/kuppal2/xMWAS/']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29077792</th>\n",
       "      <td>proposed methods are available in the r package mixkernel, released on cran. it is fully compati...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29186328</th>\n",
       "      <td>an r package metapca is available online. (http://tsenglab.biostat.pitt.edu/software.htm).</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29186355</th>\n",
       "      <td>processed datasets, r as well as matlab implementations of the methods are available at https://...</td>\n",
       "      <td>{'github': ['github.com/mehr-een/bemkl-rbps.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29547932</th>\n",
       "      <td>netics is available at https://github.com/cbg-ethz/netics.</td>\n",
       "      <td>{'github': ['github.com/cbg-ethz/netics.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29590301</th>\n",
       "      <td>genease can be accessed freely at http://research.cchmc.org/mershalab/genease/login.html.</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29912365</th>\n",
       "      <td>software and vignettes are available at http://github.com/kevinvervier/tisan.</td>\n",
       "      <td>{'github': ['github.com/kevinVervier/TiSAn.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29931190</th>\n",
       "      <td>grimon is freely available at https://github.com/mkanai/grimon as an r package with example omic...</td>\n",
       "      <td>{'github': ['github.com/mkanai/grimon']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29939219</th>\n",
       "      <td>our r source code is available online at https://github.com/angy89/robustclusteringpatientsubtyp...</td>\n",
       "      <td>{'github': ['github.com/angy89/RobustClusteringPatientSubtyping.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30059974</th>\n",
       "      <td>discoversl package with user manual and sample workflow is available for download from github ur...</td>\n",
       "      <td>{'github': ['github.com/shaoli86/DiscoverSL/releases/tag/V1.0']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30184058</th>\n",
       "      <td>freely available on the github at https://github.com/chpgenetics/fbm.</td>\n",
       "      <td>{'github': ['github.com/CHPGenetics/FBM.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30202885</th>\n",
       "      <td>tobmiknn is freely available at https://github.com/xuesidong/tobmi.</td>\n",
       "      <td>{'github': ['github.com/XuesiDong/TOBMI.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30239606</th>\n",
       "      <td>we have implemented the proposed methods in an r package freely available at http://www.github.c...</td>\n",
       "      <td>{'github': ['github.com/baolinwu/MSKAT.'], '.git': ['http://www.github.com/baolinwu/MSKAT.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30295701</th>\n",
       "      <td>https://folk.uio.no/hadift/miv/ [user/pass via hadift@medisin. uio.no].</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30329022</th>\n",
       "      <td>https://github.com/michaelsharpnack/grassmanncluster.</td>\n",
       "      <td>{'github': ['github.com/michaelsharpnack/GrassmannCluster.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30358822</th>\n",
       "      <td>modreg is freely accessible at http://cis.hku.hk/software/modreg.tar.gz.</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30423084</th>\n",
       "      <td>an implementation of our methodology is available in the r package itop on cran. additionally, a...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30561547</th>\n",
       "      <td>datasets and scripts for reproduction of results are available through: https://nalab.stanford.e...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30596886</th>\n",
       "      <td>r package at the b2slab site: http://b2slab.upc.edu/software-and-tutorials/ and python package: ...</td>\n",
       "      <td>{'pypi': ['multiview.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30657866</th>\n",
       "      <td>diablo is implemented in the mixomics r bioconductor package with functions for parameters' choi...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30668675</th>\n",
       "      <td>the reprogenomics viewer resource is freely accessible at http://rgv.genouest.org. the website i...</td>\n",
       "      <td>{'github': ['github.com/fchalmel/RGV.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30698637</th>\n",
       "      <td>code for nemo and for reproducing all nemo results in this paper is in github: https://github.co...</td>\n",
       "      <td>{'github': ['github.com/Shamir-Lab/NEMO.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30768150</th>\n",
       "      <td>drugcomboexplorer is available at https://github.com/roosevelt-pku/drugcombinationprediction.</td>\n",
       "      <td>{'github': ['github.com/Roosevelt-PKU/drugcombinationprediction.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30825303</th>\n",
       "      <td>prediction results and a reference implementation of dgmdl in python is available on https://git...</td>\n",
       "      <td>{'github': ['github.com/luoping1004/dgMDL.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30863842</th>\n",
       "      <td>the r package absnf is freely available for downloading from https://github.com/pfruan/absnf.</td>\n",
       "      <td>{'github': ['github.com/pfruan/abSNF.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30957844</th>\n",
       "      <td>the smccnet algorithm is written in r, and is freely available on the web at https://cran.r-proj...</td>\n",
       "      <td>{'cran': ['SmCCNet/index.html.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31141125</th>\n",
       "      <td>cnet can be downloaded at https://github.com/bsml320/cnet.</td>\n",
       "      <td>{'github': ['github.com/bsml320/CNet.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31406990</th>\n",
       "      <td>our software is available at https://github.com/vgteam/vg, https://github.com/jltsiren/gbwt and ...</td>\n",
       "      <td>{'github': ['github.com/vgteam/vg,', 'github.com/jltsiren/gbwt', 'github.com/jltsiren/gcsa2.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31501851</th>\n",
       "      <td>spectrum is available as an r software package from cran https://cran.r-project.org/web/packages...</td>\n",
       "      <td>{'cran': ['Spectrum/index.html.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31504182</th>\n",
       "      <td>the r package 'mnd' is available at url: https://www.itb.cnr.it/mnd.</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31504184</th>\n",
       "      <td>the lucid method is implemented through the lucidus r package available on cran (https://cran.r-...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31510700</th>\n",
       "      <td>https://github.com/hosseinshn/moli.</td>\n",
       "      <td>{'github': ['github.com/hosseinshn/MOLI.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31693075</th>\n",
       "      <td>the r-package is available at https://github.com/yaluwen/omicpred.</td>\n",
       "      <td>{'github': ['github.com/YaluWen/OmicPred.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31693090</th>\n",
       "      <td>code can be downloaded from https://github.com/zoesgithub/mtbnn.</td>\n",
       "      <td>{'github': ['github.com/Zoesgithub/MtBNN.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31873721</th>\n",
       "      <td>the raw files of metagenomics, metabolomics, and transcriptomics data can be accessed at ncbi sr...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31913435</th>\n",
       "      <td>the r-package is available at https://github.com/yaluwen/uomic.</td>\n",
       "      <td>{'github': ['github.com/YaluWen/Uomic.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31960894</th>\n",
       "      <td>the sismonr package is implemented in r and julia and is publicly available on the cran reposito...</td>\n",
       "      <td>{'.git': ['https://oliviaab.github.io/sismonr/.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32176258</th>\n",
       "      <td>m2ia is public available at http://m2ia.met-bioinformatics.cn.</td>\n",
       "      <td>{}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32657371</th>\n",
       "      <td>https://github.com/hosseinshn/aitl.</td>\n",
       "      <td>{'github': ['github.com/hosseinshn/AITL.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32657382</th>\n",
       "      <td>unioncom software is available at https://github.com/caokai1073/unioncom.</td>\n",
       "      <td>{'github': ['github.com/caokai1073/UnionCom.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32692809</th>\n",
       "      <td>source code is available at https://github.com/cheminfo/compass.</td>\n",
       "      <td>{'github': ['github.com/cheminfo/COMPASS.']}</td>\n",
       "      <td>Bioinformatics (Oxford, England)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                                                        match  \\\n",
       "uid                                                                                                             \n",
       "25527095                       the netgestalt crc portal can be freely accessed at http://www.netgestalt.org.   \n",
       "26377073                 the source code repository is publicly available at https://github.com/yangzi4/inmf.   \n",
       "26568631                              chronos is freely available at http://biosignal.med.upatras.gr/chronos/   \n",
       "26787660                          genetrail2 can be freely accessed under https://genetrail2.bioinf.uni-sb.de   \n",
       "26883487  the source code, required data for prediction, and demo data for test are freely available at: h...   \n",
       "27797774  http://metagenomics.atc.tcs.com/webigloo and http://121.241.184.233/webigloo [freely available f...   \n",
       "28082455                      : the code for this work is available at https://github.com/olganikolova/gbgfa.   \n",
       "28334215        matlab codes for flux balance analysis in this study are available in supplementary material.   \n",
       "28407042                                          the source code is at https://github.com/zhangxf-ccnu/pdna.   \n",
       "28520848  pfa has been implemented as a matlab package, which is available at http://www.sysbio.ac.cn/cb/c...   \n",
       "28605519  the package is implemented in r and available under gpl-2 license from the bioconductor website ...   \n",
       "28961954  pimp is available at http://polyomics.mvls.gla.ac.uk, and access is freely available on request....   \n",
       "29069296                https://kuppal.shinyapps.io/xmwas (online) and https://github.com/kuppal2/xmwas/ (r).   \n",
       "29077792  proposed methods are available in the r package mixkernel, released on cran. it is fully compati...   \n",
       "29186328           an r package metapca is available online. (http://tsenglab.biostat.pitt.edu/software.htm).   \n",
       "29186355  processed datasets, r as well as matlab implementations of the methods are available at https://...   \n",
       "29547932                                           netics is available at https://github.com/cbg-ethz/netics.   \n",
       "29590301            genease can be accessed freely at http://research.cchmc.org/mershalab/genease/login.html.   \n",
       "29912365                        software and vignettes are available at http://github.com/kevinvervier/tisan.   \n",
       "29931190  grimon is freely available at https://github.com/mkanai/grimon as an r package with example omic...   \n",
       "29939219  our r source code is available online at https://github.com/angy89/robustclusteringpatientsubtyp...   \n",
       "30059974  discoversl package with user manual and sample workflow is available for download from github ur...   \n",
       "30184058                                freely available on the github at https://github.com/chpgenetics/fbm.   \n",
       "30202885                                  tobmiknn is freely available at https://github.com/xuesidong/tobmi.   \n",
       "30239606  we have implemented the proposed methods in an r package freely available at http://www.github.c...   \n",
       "30295701                              https://folk.uio.no/hadift/miv/ [user/pass via hadift@medisin. uio.no].   \n",
       "30329022                                                https://github.com/michaelsharpnack/grassmanncluster.   \n",
       "30358822                             modreg is freely accessible at http://cis.hku.hk/software/modreg.tar.gz.   \n",
       "30423084  an implementation of our methodology is available in the r package itop on cran. additionally, a...   \n",
       "30561547  datasets and scripts for reproduction of results are available through: https://nalab.stanford.e...   \n",
       "30596886  r package at the b2slab site: http://b2slab.upc.edu/software-and-tutorials/ and python package: ...   \n",
       "30657866  diablo is implemented in the mixomics r bioconductor package with functions for parameters' choi...   \n",
       "30668675  the reprogenomics viewer resource is freely accessible at http://rgv.genouest.org. the website i...   \n",
       "30698637  code for nemo and for reproducing all nemo results in this paper is in github: https://github.co...   \n",
       "30768150        drugcomboexplorer is available at https://github.com/roosevelt-pku/drugcombinationprediction.   \n",
       "30825303  prediction results and a reference implementation of dgmdl in python is available on https://git...   \n",
       "30863842        the r package absnf is freely available for downloading from https://github.com/pfruan/absnf.   \n",
       "30957844  the smccnet algorithm is written in r, and is freely available on the web at https://cran.r-proj...   \n",
       "31141125                                           cnet can be downloaded at https://github.com/bsml320/cnet.   \n",
       "31406990  our software is available at https://github.com/vgteam/vg, https://github.com/jltsiren/gbwt and ...   \n",
       "31501851  spectrum is available as an r software package from cran https://cran.r-project.org/web/packages...   \n",
       "31504182                                 the r package 'mnd' is available at url: https://www.itb.cnr.it/mnd.   \n",
       "31504184  the lucid method is implemented through the lucidus r package available on cran (https://cran.r-...   \n",
       "31510700                                                                  https://github.com/hosseinshn/moli.   \n",
       "31693075                                   the r-package is available at https://github.com/yaluwen/omicpred.   \n",
       "31693090                                     code can be downloaded from https://github.com/zoesgithub/mtbnn.   \n",
       "31873721  the raw files of metagenomics, metabolomics, and transcriptomics data can be accessed at ncbi sr...   \n",
       "31913435                                      the r-package is available at https://github.com/yaluwen/uomic.   \n",
       "31960894  the sismonr package is implemented in r and julia and is publicly available on the cran reposito...   \n",
       "32176258                                       m2ia is public available at http://m2ia.met-bioinformatics.cn.   \n",
       "32657371                                                                  https://github.com/hosseinshn/aitl.   \n",
       "32657382                            unioncom software is available at https://github.com/caokai1073/unioncom.   \n",
       "32692809                                     source code is available at https://github.com/cheminfo/compass.   \n",
       "\n",
       "                                                                                      detected_code_links  \\\n",
       "uid                                                                                                         \n",
       "25527095                                                                                               {}   \n",
       "26377073                                                         {'github': ['github.com/yangzi4/iNMF.']}   \n",
       "26568631                                                                                               {}   \n",
       "26787660                                                                                               {}   \n",
       "26883487                                                                                               {}   \n",
       "27797774                                                                                               {}   \n",
       "28082455                                                   {'github': ['github.com/olganikolova/gbgfa.']}   \n",
       "28334215                                                                                               {}   \n",
       "28407042                                                    {'github': ['github.com/Zhangxf-ccnu/pDNA.']}   \n",
       "28520848                                                                                               {}   \n",
       "28605519                                                          {'bioconductor': ['CancerSubtypes/).']}   \n",
       "28961954                                                        {'github': ['github.com/RonanDaly/pimp']}   \n",
       "29069296                                                        {'github': ['github.com/kuppal2/xMWAS/']}   \n",
       "29077792                                                                                               {}   \n",
       "29186328                                                                                               {}   \n",
       "29186355                                                  {'github': ['github.com/mehr-een/bemkl-rbps.']}   \n",
       "29547932                                                      {'github': ['github.com/cbg-ethz/netics.']}   \n",
       "29590301                                                                                               {}   \n",
       "29912365                                                   {'github': ['github.com/kevinVervier/TiSAn.']}   \n",
       "29931190                                                         {'github': ['github.com/mkanai/grimon']}   \n",
       "29939219                              {'github': ['github.com/angy89/RobustClusteringPatientSubtyping.']}   \n",
       "30059974                                 {'github': ['github.com/shaoli86/DiscoverSL/releases/tag/V1.0']}   \n",
       "30184058                                                      {'github': ['github.com/CHPGenetics/FBM.']}   \n",
       "30202885                                                      {'github': ['github.com/XuesiDong/TOBMI.']}   \n",
       "30239606    {'github': ['github.com/baolinwu/MSKAT.'], '.git': ['http://www.github.com/baolinwu/MSKAT.']}   \n",
       "30295701                                                                                               {}   \n",
       "30329022                                    {'github': ['github.com/michaelsharpnack/GrassmannCluster.']}   \n",
       "30358822                                                                                               {}   \n",
       "30423084                                                                                               {}   \n",
       "30561547                                                                                               {}   \n",
       "30596886                                                                         {'pypi': ['multiview.']}   \n",
       "30657866                                                                                               {}   \n",
       "30668675                                                         {'github': ['github.com/fchalmel/RGV.']}   \n",
       "30698637                                                      {'github': ['github.com/Shamir-Lab/NEMO.']}   \n",
       "30768150                              {'github': ['github.com/Roosevelt-PKU/drugcombinationprediction.']}   \n",
       "30825303                                                    {'github': ['github.com/luoping1004/dgMDL.']}   \n",
       "30863842                                                         {'github': ['github.com/pfruan/abSNF.']}   \n",
       "30957844                                                                {'cran': ['SmCCNet/index.html.']}   \n",
       "31141125                                                         {'github': ['github.com/bsml320/CNet.']}   \n",
       "31406990  {'github': ['github.com/vgteam/vg,', 'github.com/jltsiren/gbwt', 'github.com/jltsiren/gcsa2.']}   \n",
       "31501851                                                               {'cran': ['Spectrum/index.html.']}   \n",
       "31504182                                                                                               {}   \n",
       "31504184                                                                                               {}   \n",
       "31510700                                                      {'github': ['github.com/hosseinshn/MOLI.']}   \n",
       "31693075                                                     {'github': ['github.com/YaluWen/OmicPred.']}   \n",
       "31693090                                                     {'github': ['github.com/Zoesgithub/MtBNN.']}   \n",
       "31873721                                                                                               {}   \n",
       "31913435                                                        {'github': ['github.com/YaluWen/Uomic.']}   \n",
       "31960894                                               {'.git': ['https://oliviaab.github.io/sismonr/.']}   \n",
       "32176258                                                                                               {}   \n",
       "32657371                                                      {'github': ['github.com/hosseinshn/AITL.']}   \n",
       "32657382                                                  {'github': ['github.com/caokai1073/UnionCom.']}   \n",
       "32692809                                                     {'github': ['github.com/cheminfo/COMPASS.']}   \n",
       "\n",
       "                                   journal  \n",
       "uid                                         \n",
       "25527095  Bioinformatics (Oxford, England)  \n",
       "26377073  Bioinformatics (Oxford, England)  \n",
       "26568631  Bioinformatics (Oxford, England)  \n",
       "26787660  Bioinformatics (Oxford, England)  \n",
       "26883487  Bioinformatics (Oxford, England)  \n",
       "27797774  Bioinformatics (Oxford, England)  \n",
       "28082455  Bioinformatics (Oxford, England)  \n",
       "28334215  Bioinformatics (Oxford, England)  \n",
       "28407042  Bioinformatics (Oxford, England)  \n",
       "28520848  Bioinformatics (Oxford, England)  \n",
       "28605519  Bioinformatics (Oxford, England)  \n",
       "28961954  Bioinformatics (Oxford, England)  \n",
       "29069296  Bioinformatics (Oxford, England)  \n",
       "29077792  Bioinformatics (Oxford, England)  \n",
       "29186328  Bioinformatics (Oxford, England)  \n",
       "29186355  Bioinformatics (Oxford, England)  \n",
       "29547932  Bioinformatics (Oxford, England)  \n",
       "29590301  Bioinformatics (Oxford, England)  \n",
       "29912365  Bioinformatics (Oxford, England)  \n",
       "29931190  Bioinformatics (Oxford, England)  \n",
       "29939219  Bioinformatics (Oxford, England)  \n",
       "30059974  Bioinformatics (Oxford, England)  \n",
       "30184058  Bioinformatics (Oxford, England)  \n",
       "30202885  Bioinformatics (Oxford, England)  \n",
       "30239606  Bioinformatics (Oxford, England)  \n",
       "30295701  Bioinformatics (Oxford, England)  \n",
       "30329022  Bioinformatics (Oxford, England)  \n",
       "30358822  Bioinformatics (Oxford, England)  \n",
       "30423084  Bioinformatics (Oxford, England)  \n",
       "30561547  Bioinformatics (Oxford, England)  \n",
       "30596886  Bioinformatics (Oxford, England)  \n",
       "30657866  Bioinformatics (Oxford, England)  \n",
       "30668675  Bioinformatics (Oxford, England)  \n",
       "30698637  Bioinformatics (Oxford, England)  \n",
       "30768150  Bioinformatics (Oxford, England)  \n",
       "30825303  Bioinformatics (Oxford, England)  \n",
       "30863842  Bioinformatics (Oxford, England)  \n",
       "30957844  Bioinformatics (Oxford, England)  \n",
       "31141125  Bioinformatics (Oxford, England)  \n",
       "31406990  Bioinformatics (Oxford, England)  \n",
       "31501851  Bioinformatics (Oxford, England)  \n",
       "31504182  Bioinformatics (Oxford, England)  \n",
       "31504184  Bioinformatics (Oxford, England)  \n",
       "31510700  Bioinformatics (Oxford, England)  \n",
       "31693075  Bioinformatics (Oxford, England)  \n",
       "31693090  Bioinformatics (Oxford, England)  \n",
       "31873721  Bioinformatics (Oxford, England)  \n",
       "31913435  Bioinformatics (Oxford, England)  \n",
       "31960894  Bioinformatics (Oxford, England)  \n",
       "32176258  Bioinformatics (Oxford, England)  \n",
       "32657371  Bioinformatics (Oxford, England)  \n",
       "32657382  Bioinformatics (Oxford, England)  \n",
       "32692809  Bioinformatics (Oxford, England)  "
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "header = 'availability and implementation'\n",
    "\n",
    "get_statments(f'{escape(header)}\\n(.*?)\\n')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Store the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "Stored `code_repositories` (C557967A → 5FF4AA2D) at Saturday, 25. Jul 2020 15:56"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {
      "text/markdown": {
       "action": "store",
       "command": "store code_repositories in pubmed_derived_data",
       "finished": "2020-07-25T15:56:26.412470",
       "finished_human_readable": "Saturday, 25. Jul 2020 15:56",
       "result": [
        {
         "new_file": {
          "crc32": "5FF4AA2D",
          "sha256": "92B28FE95EA205C4311BD4E9D6360D87087D0C5D452CCF9567829CFFD27EE1E5"
         },
         "old_file": {
          "crc32": "C557967A",
          "sha256": "53E92545F8C164C57DD36D4DD7B4823D960B0C576524740753A902E59D8E438C"
         },
         "subject": "code_repositories"
        }
       ],
       "started": "2020-07-25T15:56:25.425898"
      }
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "%vault store code_repositories in pubmed_derived_data"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}