1814 lines (1813 with data), 89.6 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%run notebook_setup.ipynb"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"Imported `literature` (904B0F94) at Saturday, 25. Jul 2020 15:56"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {
"text/markdown": {
"action": "import",
"command": "from pubmed_derived_data import literature",
"finished": "2020-07-25T15:56:17.417838",
"finished_human_readable": "Saturday, 25. Jul 2020 15:56",
"result": [
{
"new_file": {
"crc32": "904B0F94",
"sha256": "A2EFC068A287A3B724AE4B320EE5356E1E99474BD08A2E2A3EBA34CD0194F23B"
},
"subject": "literature"
}
],
"started": "2020-07-25T15:56:15.499885"
}
},
"output_type": "display_data"
}
],
"source": [
"%vault from pubmed_derived_data import literature"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"code_repositories = literature.index.to_frame().drop(columns='uid').copy()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Code archives and repositories extraction"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from re import escape\n",
"from pandas import Series\n",
"\n",
"from repository_detection import (\n",
" source_code_platforms, mixed_publication_platforms, data_only_platforms,\n",
" all_platforms as platforms\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'github': '(github\\\\.com/\\\\S+/\\\\S+)',\n",
" 'gitlab': '(gitlab\\\\.com/\\\\S+/\\\\S+)',\n",
" 'sourceforge': '(sourceforge\\\\.net/\\\\S+)',\n",
" 'bitbucket': '(bitbucket\\\\.org/\\\\S+)',\n",
" '.git': '(\\\\S+:\\\\S+\\\\.git\\\\S*)',\n",
" 'cran': 'cran\\\\.r-project\\\\.org/(?:web/packages/|package=)(\\\\S+)(?:/\\\\b|/$|\\\\s|\\\\.$|\\\\)|$)',\n",
" 'pypi': 'pypi\\\\.python\\\\.org/pypi/(\\\\S+)(?:/\\\\b|/$|\\\\s|\\\\.$|\\\\)|$)'}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"source_code_platforms"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'zenodo': 'doi\\\\.org/10.5281/(zenodo\\\\.\\\\d+?)(?:/\\\\b|/$|\\\\s|\\\\.$|\\\\)|$)',\n",
" 'bioconductor': 'bioconductor.org/packages/(\\\\S+)(?:/\\\\b|/$|\\\\s|\\\\.$|\\\\)|$)',\n",
" 'osf': 'osf.io/(\\\\S+){slash_or_end}'}"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mixed_publication_platforms"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'dryad': 'datadryad.org/(\\\\S+)(?:/\\\\b|/$|\\\\s|\\\\.$|\\\\)|$)'}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_only_platforms"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def collapse_lists(lists):\n",
" return sorted(set(sum(lists, [])))\n",
"\n",
"\n",
"fields = Series(['abstract', 'full_text'])\n",
"\n",
"for platform in platforms:\n",
" for field in fields:\n",
" code_repositories[f'{field}_mentions_{platform}'] = literature[field].str.lower().str.contains(escape(platform)) == True\n",
" matches = literature[field].astype(str).str.findall(platforms[platform])\n",
" code_repositories[f'{field}_{platform}_matches'] = matches\n",
" code_repositories[f'mentions_{platform}'] = code_repositories[fields + f'_mentions_{platform}'].any(axis=1)\n",
" code_repositories[f'{platform}_matches'] = code_repositories[fields + f'_{platform}_matches'].apply(collapse_lists, axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ext-link handling is not needed for abstracts:"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"assert not literature.abstract.str.contains('ext-link').any()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"abstract_mentions_github 54\n",
"abstract_mentions_gitlab 2\n",
"abstract_mentions_sourceforge 4\n",
"abstract_mentions_bitbucket 3\n",
"abstract_mentions_.git 2\n",
"abstract_mentions_cran 19\n",
"abstract_mentions_pypi 2\n",
"abstract_mentions_zenodo 1\n",
"abstract_mentions_bioconductor 16\n",
"abstract_mentions_osf 1\n",
"abstract_mentions_dryad 0\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"code_repositories[[f'abstract_mentions_{platform}' for platform in platforms]].sum()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"full_text_mentions_github 205\n",
"full_text_mentions_gitlab 5\n",
"full_text_mentions_sourceforge 44\n",
"full_text_mentions_bitbucket 15\n",
"full_text_mentions_.git 54\n",
"full_text_mentions_cran 68\n",
"full_text_mentions_pypi 5\n",
"full_text_mentions_zenodo 13\n",
"full_text_mentions_bioconductor 106\n",
"full_text_mentions_osf 4\n",
"full_text_mentions_dryad 1\n",
"dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"code_repositories[[f'full_text_mentions_{platform}' for platform in platforms]].sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The sourceforge mentions might be uses of tools."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"mentions_github 245\n",
"mentions_gitlab 5\n",
"mentions_sourceforge 46\n",
"mentions_bitbucket 17\n",
"mentions_.git 56\n",
"mentions_cran 82\n",
"mentions_pypi 7\n",
"mentions_zenodo 14\n",
"mentions_bioconductor 116\n",
"mentions_osf 5\n",
"mentions_dryad 1\n",
"dtype: int64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"code_repositories[[f'mentions_{platform}' for platform in platforms]].sum()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"github_matches [github.com/Magdoll//ECE, github.com/wizardfan...\n",
"gitlab_matches [gitlab.com/Gustafsson-lab/lassim, gitlab.com/...\n",
"sourceforge_matches [sourceforge.net/., sourceforge.net/projects/a...\n",
"bitbucket_matches [bitbucket.org/hbc/galaxy-central-hbc, bitbuck...\n",
".git_matches [http://networkx.github.io/, http://broadinsti...\n",
"cran_matches [gplots/index.html, iSubpathwayMiner/, mixOmic...\n",
"pypi_matches [omics_pipe), MACS2, multiview.]\n",
"zenodo_matches [zenodo.35611, zenodo.546110, zenodo.1154124, ...\n",
"bioconductor_matches [release/data/annotation/html/hgu133plus2.db.h...\n",
"osf_matches []\n",
"dryad_matches []\n",
"dtype: object"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"code_repositories[[f'{platform}_matches' for platform in platforms]].sum()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"index\n",
"http://broadinstitute.github.io/picard/ 11\n",
"http://broadinstitute.github.io/picard 4\n",
"http://cole-trapnell-lab.github.io/cufflinks/ 3\n",
"https://broadinstitute.github.io/picard/ 3\n",
"http://zwdzwd.github.io/InfiniumAnnotation 2\n",
"https://trinotate.github.io 2\n",
"http://cole-trapnell-lab.github.io/cufflinks/cuffmerge/ 1\n",
"http://cole-trapnell-lab.github.io/cufflinks/releases/v2.2.1/ 1\n",
"http://cole-trapnell-lab.github.io/monocle-release/articles/v2.0.0 1\n",
"http://galaxyproject.github.io/training-material 1\n",
"http://kwanjeeraw.github.io/grinn/ 1\n",
"http://linsalrob.github.io/PyFBA/ 1\n",
"http://mzmine.github.io/ 1\n",
"http://networkx.github.io/ 1\n",
"http://picrust.github.com/picrust/ 1\n",
"http://sbgn.github.io/sbgn/ 1\n",
"http://topepo.github.io/caret/index.html 1\n",
"http://varadanlab.github.io/InFlo/ 1\n",
"http://www.github.com/baolinwu/MSKAT. 1\n",
"http://yjjang.github.io/mongkie 1\n",
"https://broadinstitute.github.io/picard 1\n",
"https://chun-weitung.gitbooks.io/chemdis/content/data-sources.html 1\n",
"https://chun-weitung.gitbooks.io/chemdis/content/web-api.html 1\n",
"https://funricegenes.github.io/ 1\n",
"https://galaxyproject.github.io/training-material/topics/proteomics/tutorials/metaproteomics/tutorial.html 1\n",
"https://galaxyproject.github.io/training-material/topics/proteomics/tutorials/proteogenomics-novel-peptide-analysis/tutorial.html 1\n",
"https://ginolhac.github.io/mapDamage/ 1\n",
"https://gist.github.com/yannabraham/c1f9de9b23fb94105ca5 1\n",
"https://github.com/TiphaineCMartin/multiomic_AITD.git 1\n",
"https://github.com/biopython/biopython.github.io/ 1\n",
"https://github.com/galaxyproteomics/mvpapplication-git.git 1\n",
"https://github.com/ucdavis-bioinformatics/sickle.git 1\n",
"https://github.com/xu1912/SMON.git 1\n",
"https://jcupitt.github.io/libvips/ 1\n",
"https://jheatmap.github.io/jheatmap/ 1\n",
"https://oliviaab.github.io/sismonr/. 1\n",
"https://opencobra.github.io/ 1\n",
"https://openseadragon.github.io/ 1\n",
"https://rrshieldscutler.github.io/splinectomeR/ 1\n",
"https://rstudio.github.io/DT/ 1\n",
"https://samtools.github.io/hts-specs/SAMv1.pdf 1\n",
"https://transdecoder.github.io 1\n",
"https://transdecoder.github.io/ 1\n",
"https://xjtu-funet-source.github.io/FuNet/FuNet.html 1\n",
"Name: 0, dtype: int64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Series(code_repositories['.git_matches'].sum()).sorted_value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Limitations\n",
"\n",
" - does not account for GitLab instances in custom domains"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Screen for code availability statements"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can compose a list of common code availability phrases and use that to see what are the other ways of sharing code."
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"index\n",
"Scientific reports 126\n",
"Omics : a journal of integrative biology 78\n",
"PloS one 69\n",
"Bioinformatics (Oxford, England) 68\n",
"Nature communications 58\n",
"Frontiers in genetics 55\n",
"Journal of proteomics 53\n",
"BMC bioinformatics 52\n",
"Nucleic acids research 45\n",
"Methods in molecular biology (Clifton, N.J.) 43\n",
"Name: journal, dtype: int64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"journals_with_most_papers = literature['journal'].sorted_value_counts().head(20)\n",
"journals_with_most_papers.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"literature = literature.drop(columns=list(set(literature.columns) & set(code_repositories.columns)))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"72"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"likely_to_contain_code = Series(list(source_code_platforms) + list(mixed_publication_platforms))\n",
"any_source_link = code_repositories['abstract_' + likely_to_contain_code + '_matches'].any(axis=1)\n",
"any_source_link.sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note: this does not have to their source code - can be of a tool they used!"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"literature['has_source_code_link'] = any_source_link"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>journal</th>\n",
" <th>papers_with_code_link</th>\n",
" <th>common_n_grams</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Scientific reports</td>\n",
" <td>0</td>\n",
" <td>[]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Omics : a journal of integrative biology</td>\n",
" <td>0</td>\n",
" <td>[]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>PloS one</td>\n",
" <td>0</td>\n",
" <td>[]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" <td>42</td>\n",
" <td>[availability and implementation, https github com, supplementary information supplementary data are available at bioinformatics online]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nature communications</td>\n",
" <td>0</td>\n",
" <td>[]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Frontiers in genetics</td>\n",
" <td>5</td>\n",
" <td>[available at https github com, co expression modules, deep learning based, dysfunctional subpathways we, is available at https, multi omics data, number of subtypes]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Journal of proteomics</td>\n",
" <td>0</td>\n",
" <td>[]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>BMC bioinformatics</td>\n",
" <td>6</td>\n",
" <td>[available at https, chronic lymphocytic leukaemia, is freely available]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Nucleic acids research</td>\n",
" <td>0</td>\n",
" <td>[]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Methods in molecular biology (Clifton, N.J.)</td>\n",
" <td>1</td>\n",
" <td>[]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Journal of proteome research</td>\n",
" <td>0</td>\n",
" <td>[]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>BMC genomics</td>\n",
" <td>2</td>\n",
" <td>[candidate synthetic lethal, loss of function, synthetic lethal gene pairs]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Oncotarget</td>\n",
" <td>0</td>\n",
" <td>[]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>International journal of molecular sciences</td>\n",
" <td>0</td>\n",
" <td>[]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Briefings in bioinformatics</td>\n",
" <td>3</td>\n",
" <td>[canonical correlation analysis]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Frontiers in microbiology</td>\n",
" <td>0</td>\n",
" <td>[]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Molecular & cellular proteomics : MCP</td>\n",
" <td>1</td>\n",
" <td>[from multiple experiments, of gene sets]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>mSystems</td>\n",
" <td>0</td>\n",
" <td>[]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>Cell reports</td>\n",
" <td>0</td>\n",
" <td>[]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Metabolites</td>\n",
" <td>0</td>\n",
" <td>[]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" journal papers_with_code_link \\\n",
"0 Scientific reports 0 \n",
"1 Omics : a journal of integrative biology 0 \n",
"2 PloS one 0 \n",
"3 Bioinformatics (Oxford, England) 42 \n",
"4 Nature communications 0 \n",
"5 Frontiers in genetics 5 \n",
"6 Journal of proteomics 0 \n",
"7 BMC bioinformatics 6 \n",
"8 Nucleic acids research 0 \n",
"9 Methods in molecular biology (Clifton, N.J.) 1 \n",
"10 Journal of proteome research 0 \n",
"11 BMC genomics 2 \n",
"12 Oncotarget 0 \n",
"13 International journal of molecular sciences 0 \n",
"14 Briefings in bioinformatics 3 \n",
"15 Frontiers in microbiology 0 \n",
"16 Molecular & cellular proteomics : MCP 1 \n",
"17 mSystems 0 \n",
"18 Cell reports 0 \n",
"19 Metabolites 0 \n",
"\n",
" common_n_grams \n",
"0 [] \n",
"1 [] \n",
"2 [] \n",
"3 [availability and implementation, https github com, supplementary information supplementary data are available at bioinformatics online] \n",
"4 [] \n",
"5 [available at https github com, co expression modules, deep learning based, dysfunctional subpathways we, is available at https, multi omics data, number of subtypes] \n",
"6 [] \n",
"7 [available at https, chronic lymphocytic leukaemia, is freely available] \n",
"8 [] \n",
"9 [] \n",
"10 [] \n",
"11 [candidate synthetic lethal, loss of function, synthetic lethal gene pairs] \n",
"12 [] \n",
"13 [] \n",
"14 [canonical correlation analysis] \n",
"15 [] \n",
"16 [from multiple experiments, of gene sets] \n",
"17 [] \n",
"18 [] \n",
"19 [] "
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas\n",
"pandas.set_option('display.max_colwidth', 500)\n",
"from pandas import DataFrame\n",
"from helpers.n_grams import find_longest_common_n_grams\n",
"\n",
"\n",
"statments = []\n",
"\n",
"for journal in journals_with_most_papers.index:\n",
" relevant_papers = literature[(literature.journal == journal) & literature.has_source_code_link]\n",
" relevant_papers_with_abstracts = relevant_papers[~relevant_papers.abstract.isnull()]\n",
"\n",
" statments.append({\n",
" 'journal': journal,\n",
" 'papers_with_code_link': len(relevant_papers),\n",
" 'common_n_grams': find_longest_common_n_grams(\n",
" data=relevant_papers_with_abstracts.abstract,\n",
" min_words=3, max_words=10,\n",
" min_count=3, min_frequency=0.5\n",
" )\n",
" })\n",
"\n",
"DataFrame(statments)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Only _Bioinformatics (Oxford, England)_, _Frontiers in genetics_ and _BMC bioinformatics_ had n-grams which looked like code availability statements."
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"compact_code_links_summary = (\n",
" code_repositories['abstract_' + Series(list(platforms)) + '_matches']\n",
" .rename(columns=lambda c: c.replace('abstract_', '').replace('_matches', ''))\n",
" .apply(lambda x: x[x.apply(len) != 0].to_dict(), axis=1)\n",
" .to_frame('detected_code_links')\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"def get_statments(expression):\n",
" return (\n",
" literature['abstract'].str.lower()\n",
" .str.extract(expression).dropna()\n",
" .rename(columns={0: 'match'})\n",
" .join(compact_code_links_summary)\n",
" .join(literature)[['match', 'detected_code_links', 'journal']]\n",
" .rename(columns=lambda x: x.replace('abstract_', ''))\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"pandas.set_option('display.max_colwidth', 100)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### BMC bioinformatics"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>match</th>\n",
" <th>detected_code_links</th>\n",
" <th>journal</th>\n",
" </tr>\n",
" <tr>\n",
" <th>uid</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>17468122</th>\n",
" <td>at http://integromics.kobic.re.kr/gazer/.</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19706745</th>\n",
" <td>from http://cran.r-project.org/ or from the web site companion (http://math.univ-toulouse.fr/bio...</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26568631</th>\n",
" <td>at http://biosignal.med.upatras.gr/chronos/</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27814671</th>\n",
" <td>as a bioconductor r package.</td>\n",
" <td>{}</td>\n",
" <td>BMC genomics</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28961954</th>\n",
" <td>on request. 50 gb of space is allocated for data storage, with unrestricted number of samples an...</td>\n",
" <td>{'github': ['github.com/RonanDaly/pimp']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29931190</th>\n",
" <td>at https://github.com/mkanai/grimon as an r package with example omics data sets.</td>\n",
" <td>{'github': ['github.com/mkanai/grimon']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30202885</th>\n",
" <td>at https://github.com/xuesidong/tobmi.</td>\n",
" <td>{'github': ['github.com/XuesiDong/TOBMI.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30598101</th>\n",
" <td>at http://bioinfo.au.tsinghua.edu.cn/jianglab/csnets/ .</td>\n",
" <td>{}</td>\n",
" <td>BMC genomics</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30863842</th>\n",
" <td>for downloading from https://github.com/pfruan/absnf.</td>\n",
" <td>{'github': ['github.com/pfruan/abSNF.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30957844</th>\n",
" <td>on the web at https://cran.r-project.org/web/packages/smccnet/index.html.</td>\n",
" <td>{'cran': ['SmCCNet/index.html.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" match \\\n",
"uid \n",
"17468122 at http://integromics.kobic.re.kr/gazer/. \n",
"19706745 from http://cran.r-project.org/ or from the web site companion (http://math.univ-toulouse.fr/bio... \n",
"26568631 at http://biosignal.med.upatras.gr/chronos/ \n",
"27814671 as a bioconductor r package. \n",
"28961954 on request. 50 gb of space is allocated for data storage, with unrestricted number of samples an... \n",
"29931190 at https://github.com/mkanai/grimon as an r package with example omics data sets. \n",
"30202885 at https://github.com/xuesidong/tobmi. \n",
"30598101 at http://bioinfo.au.tsinghua.edu.cn/jianglab/csnets/ . \n",
"30863842 for downloading from https://github.com/pfruan/absnf. \n",
"30957844 on the web at https://cran.r-project.org/web/packages/smccnet/index.html. \n",
"\n",
" detected_code_links \\\n",
"uid \n",
"17468122 {} \n",
"19706745 {} \n",
"26568631 {} \n",
"27814671 {} \n",
"28961954 {'github': ['github.com/RonanDaly/pimp']} \n",
"29931190 {'github': ['github.com/mkanai/grimon']} \n",
"30202885 {'github': ['github.com/XuesiDong/TOBMI.']} \n",
"30598101 {} \n",
"30863842 {'github': ['github.com/pfruan/abSNF.']} \n",
"30957844 {'cran': ['SmCCNet/index.html.']} \n",
"\n",
" journal \n",
"uid \n",
"17468122 Bioinformatics (Oxford, England) \n",
"19706745 Bioinformatics (Oxford, England) \n",
"26568631 Bioinformatics (Oxford, England) \n",
"27814671 BMC genomics \n",
"28961954 Bioinformatics (Oxford, England) \n",
"29931190 Bioinformatics (Oxford, England) \n",
"30202885 Bioinformatics (Oxford, England) \n",
"30598101 BMC genomics \n",
"30863842 Bioinformatics (Oxford, England) \n",
"30957844 Bioinformatics (Oxford, England) "
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"get_statments('is freely available (.*?)\\n')"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>match</th>\n",
" <th>detected_code_links</th>\n",
" <th>journal</th>\n",
" </tr>\n",
" <tr>\n",
" <th>uid</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>26377073</th>\n",
" <td>https://github.com/yangzi4/inmf.</td>\n",
" <td>{'github': ['github.com/yangzi4/iNMF.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26961692</th>\n",
" <td>https://github.com/maxconway/snftool</td>\n",
" <td>{'github': ['github.com/maxconway/SNFtool']}</td>\n",
" <td>BMC bioinformatics</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28082455</th>\n",
" <td>https://github.com/olganikolova/gbgfa.</td>\n",
" <td>{'github': ['github.com/olganikolova/gbgfa.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28640810</th>\n",
" <td>https://gitlab.com/gustafsson-lab/lassim.</td>\n",
" <td>{'gitlab': ['gitlab.com/Gustafsson-lab/lassim.']}</td>\n",
" <td>PLoS computational biology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28961954</th>\n",
" <td>https://github.com/ronandaly/pimp</td>\n",
" <td>{'github': ['github.com/RonanDaly/pimp']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29069501</th>\n",
" <td>https://jmorp.megabank.tohoku.ac.jp.</td>\n",
" <td>{}</td>\n",
" <td>Nucleic acids research</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29186355</th>\n",
" <td>https://github.com/mehr-een/bemkl-rbps.</td>\n",
" <td>{'github': ['github.com/mehr-een/bemkl-rbps.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29547932</th>\n",
" <td>https://github.com/cbg-ethz/netics.</td>\n",
" <td>{'github': ['github.com/cbg-ethz/netics.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29931190</th>\n",
" <td>https://github.com/mkanai/grimon</td>\n",
" <td>{'github': ['github.com/mkanai/grimon']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30202885</th>\n",
" <td>https://github.com/xuesidong/tobmi.</td>\n",
" <td>{'github': ['github.com/XuesiDong/TOBMI.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30556956</th>\n",
" <td>https://www.omicsnet.ca.</td>\n",
" <td>{}</td>\n",
" <td>Current protocols in bioinformatics</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30634897</th>\n",
" <td>https://sourceforge.net/projects/ms-helios/</td>\n",
" <td>{'sourceforge': ['sourceforge.net/projects/ms-helios/']}</td>\n",
" <td>BMC bioinformatics</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30768150</th>\n",
" <td>https://github.com/roosevelt-pku/drugcombinationprediction.</td>\n",
" <td>{'github': ['github.com/Roosevelt-PKU/drugcombinationprediction.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31092193</th>\n",
" <td>https://pathme.scai.fraunhofer.de/</td>\n",
" <td>{}</td>\n",
" <td>BMC bioinformatics</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31406990</th>\n",
" <td>https://github.com/vgteam/vg,</td>\n",
" <td>{'github': ['github.com/vgteam/vg,', 'github.com/jltsiren/gbwt', 'github.com/jltsiren/gcsa2.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31583639</th>\n",
" <td>https://keypathwayminer.compbio.sdu.dk</td>\n",
" <td>{}</td>\n",
" <td>Methods in molecular biology (Clifton, N.J.)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31642488</th>\n",
" <td>https://ccsm.uth.edu/exonskipdb/,</td>\n",
" <td>{}</td>\n",
" <td>Nucleic acids research</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31649722</th>\n",
" <td>https://targetmine.mizuguchilab.org.</td>\n",
" <td>{'github': ['github.com/chenyian-nibio/targetmine-gradle.']}</td>\n",
" <td>Frontiers in genetics</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31693075</th>\n",
" <td>https://github.com/yaluwen/omicpred.</td>\n",
" <td>{'github': ['github.com/YaluWen/OmicPred.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31913435</th>\n",
" <td>https://github.com/yaluwen/uomic.</td>\n",
" <td>{'github': ['github.com/YaluWen/Uomic.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32396365</th>\n",
" <td>https://zenodo.org/</td>\n",
" <td>{}</td>\n",
" <td>Journal of proteome research</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32657382</th>\n",
" <td>https://github.com/caokai1073/unioncom.</td>\n",
" <td>{'github': ['github.com/caokai1073/UnionCom.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32692809</th>\n",
" <td>https://github.com/cheminfo/compass.</td>\n",
" <td>{'github': ['github.com/cheminfo/COMPASS.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" match \\\n",
"uid \n",
"26377073 https://github.com/yangzi4/inmf. \n",
"26961692 https://github.com/maxconway/snftool \n",
"28082455 https://github.com/olganikolova/gbgfa. \n",
"28640810 https://gitlab.com/gustafsson-lab/lassim. \n",
"28961954 https://github.com/ronandaly/pimp \n",
"29069501 https://jmorp.megabank.tohoku.ac.jp. \n",
"29186355 https://github.com/mehr-een/bemkl-rbps. \n",
"29547932 https://github.com/cbg-ethz/netics. \n",
"29931190 https://github.com/mkanai/grimon \n",
"30202885 https://github.com/xuesidong/tobmi. \n",
"30556956 https://www.omicsnet.ca. \n",
"30634897 https://sourceforge.net/projects/ms-helios/ \n",
"30768150 https://github.com/roosevelt-pku/drugcombinationprediction. \n",
"31092193 https://pathme.scai.fraunhofer.de/ \n",
"31406990 https://github.com/vgteam/vg, \n",
"31583639 https://keypathwayminer.compbio.sdu.dk \n",
"31642488 https://ccsm.uth.edu/exonskipdb/, \n",
"31649722 https://targetmine.mizuguchilab.org. \n",
"31693075 https://github.com/yaluwen/omicpred. \n",
"31913435 https://github.com/yaluwen/uomic. \n",
"32396365 https://zenodo.org/ \n",
"32657382 https://github.com/caokai1073/unioncom. \n",
"32692809 https://github.com/cheminfo/compass. \n",
"\n",
" detected_code_links \\\n",
"uid \n",
"26377073 {'github': ['github.com/yangzi4/iNMF.']} \n",
"26961692 {'github': ['github.com/maxconway/SNFtool']} \n",
"28082455 {'github': ['github.com/olganikolova/gbgfa.']} \n",
"28640810 {'gitlab': ['gitlab.com/Gustafsson-lab/lassim.']} \n",
"28961954 {'github': ['github.com/RonanDaly/pimp']} \n",
"29069501 {} \n",
"29186355 {'github': ['github.com/mehr-een/bemkl-rbps.']} \n",
"29547932 {'github': ['github.com/cbg-ethz/netics.']} \n",
"29931190 {'github': ['github.com/mkanai/grimon']} \n",
"30202885 {'github': ['github.com/XuesiDong/TOBMI.']} \n",
"30556956 {} \n",
"30634897 {'sourceforge': ['sourceforge.net/projects/ms-helios/']} \n",
"30768150 {'github': ['github.com/Roosevelt-PKU/drugcombinationprediction.']} \n",
"31092193 {} \n",
"31406990 {'github': ['github.com/vgteam/vg,', 'github.com/jltsiren/gbwt', 'github.com/jltsiren/gcsa2.']} \n",
"31583639 {} \n",
"31642488 {} \n",
"31649722 {'github': ['github.com/chenyian-nibio/targetmine-gradle.']} \n",
"31693075 {'github': ['github.com/YaluWen/OmicPred.']} \n",
"31913435 {'github': ['github.com/YaluWen/Uomic.']} \n",
"32396365 {} \n",
"32657382 {'github': ['github.com/caokai1073/UnionCom.']} \n",
"32692809 {'github': ['github.com/cheminfo/COMPASS.']} \n",
"\n",
" journal \n",
"uid \n",
"26377073 Bioinformatics (Oxford, England) \n",
"26961692 BMC bioinformatics \n",
"28082455 Bioinformatics (Oxford, England) \n",
"28640810 PLoS computational biology \n",
"28961954 Bioinformatics (Oxford, England) \n",
"29069501 Nucleic acids research \n",
"29186355 Bioinformatics (Oxford, England) \n",
"29547932 Bioinformatics (Oxford, England) \n",
"29931190 Bioinformatics (Oxford, England) \n",
"30202885 Bioinformatics (Oxford, England) \n",
"30556956 Current protocols in bioinformatics \n",
"30634897 BMC bioinformatics \n",
"30768150 Bioinformatics (Oxford, England) \n",
"31092193 BMC bioinformatics \n",
"31406990 Bioinformatics (Oxford, England) \n",
"31583639 Methods in molecular biology (Clifton, N.J.) \n",
"31642488 Nucleic acids research \n",
"31649722 Frontiers in genetics \n",
"31693075 Bioinformatics (Oxford, England) \n",
"31913435 Bioinformatics (Oxford, England) \n",
"32396365 Journal of proteome research \n",
"32657382 Bioinformatics (Oxford, England) \n",
"32692809 Bioinformatics (Oxford, England) "
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"get_statments('available at (https.*?)\\s')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Bioinformatics (Oxford)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>match</th>\n",
" <th>detected_code_links</th>\n",
" <th>journal</th>\n",
" </tr>\n",
" <tr>\n",
" <th>uid</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>25527095</th>\n",
" <td>the netgestalt crc portal can be freely accessed at http://www.netgestalt.org.</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26377073</th>\n",
" <td>the source code repository is publicly available at https://github.com/yangzi4/inmf.</td>\n",
" <td>{'github': ['github.com/yangzi4/iNMF.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26568631</th>\n",
" <td>chronos is freely available at http://biosignal.med.upatras.gr/chronos/</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26787660</th>\n",
" <td>genetrail2 can be freely accessed under https://genetrail2.bioinf.uni-sb.de</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26883487</th>\n",
" <td>the source code, required data for prediction, and demo data for test are freely available at: h...</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27797774</th>\n",
" <td>http://metagenomics.atc.tcs.com/webigloo and http://121.241.184.233/webigloo [freely available f...</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28082455</th>\n",
" <td>: the code for this work is available at https://github.com/olganikolova/gbgfa.</td>\n",
" <td>{'github': ['github.com/olganikolova/gbgfa.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28334215</th>\n",
" <td>matlab codes for flux balance analysis in this study are available in supplementary material.</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28407042</th>\n",
" <td>the source code is at https://github.com/zhangxf-ccnu/pdna.</td>\n",
" <td>{'github': ['github.com/Zhangxf-ccnu/pDNA.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28520848</th>\n",
" <td>pfa has been implemented as a matlab package, which is available at http://www.sysbio.ac.cn/cb/c...</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28605519</th>\n",
" <td>the package is implemented in r and available under gpl-2 license from the bioconductor website ...</td>\n",
" <td>{'bioconductor': ['CancerSubtypes/).']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28961954</th>\n",
" <td>pimp is available at http://polyomics.mvls.gla.ac.uk, and access is freely available on request....</td>\n",
" <td>{'github': ['github.com/RonanDaly/pimp']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29069296</th>\n",
" <td>https://kuppal.shinyapps.io/xmwas (online) and https://github.com/kuppal2/xmwas/ (r).</td>\n",
" <td>{'github': ['github.com/kuppal2/xMWAS/']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29077792</th>\n",
" <td>proposed methods are available in the r package mixkernel, released on cran. it is fully compati...</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29186328</th>\n",
" <td>an r package metapca is available online. (http://tsenglab.biostat.pitt.edu/software.htm).</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29186355</th>\n",
" <td>processed datasets, r as well as matlab implementations of the methods are available at https://...</td>\n",
" <td>{'github': ['github.com/mehr-een/bemkl-rbps.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29547932</th>\n",
" <td>netics is available at https://github.com/cbg-ethz/netics.</td>\n",
" <td>{'github': ['github.com/cbg-ethz/netics.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29590301</th>\n",
" <td>genease can be accessed freely at http://research.cchmc.org/mershalab/genease/login.html.</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29912365</th>\n",
" <td>software and vignettes are available at http://github.com/kevinvervier/tisan.</td>\n",
" <td>{'github': ['github.com/kevinVervier/TiSAn.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29931190</th>\n",
" <td>grimon is freely available at https://github.com/mkanai/grimon as an r package with example omic...</td>\n",
" <td>{'github': ['github.com/mkanai/grimon']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29939219</th>\n",
" <td>our r source code is available online at https://github.com/angy89/robustclusteringpatientsubtyp...</td>\n",
" <td>{'github': ['github.com/angy89/RobustClusteringPatientSubtyping.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30059974</th>\n",
" <td>discoversl package with user manual and sample workflow is available for download from github ur...</td>\n",
" <td>{'github': ['github.com/shaoli86/DiscoverSL/releases/tag/V1.0']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30184058</th>\n",
" <td>freely available on the github at https://github.com/chpgenetics/fbm.</td>\n",
" <td>{'github': ['github.com/CHPGenetics/FBM.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30202885</th>\n",
" <td>tobmiknn is freely available at https://github.com/xuesidong/tobmi.</td>\n",
" <td>{'github': ['github.com/XuesiDong/TOBMI.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30239606</th>\n",
" <td>we have implemented the proposed methods in an r package freely available at http://www.github.c...</td>\n",
" <td>{'github': ['github.com/baolinwu/MSKAT.'], '.git': ['http://www.github.com/baolinwu/MSKAT.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30295701</th>\n",
" <td>https://folk.uio.no/hadift/miv/ [user/pass via hadift@medisin. uio.no].</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30329022</th>\n",
" <td>https://github.com/michaelsharpnack/grassmanncluster.</td>\n",
" <td>{'github': ['github.com/michaelsharpnack/GrassmannCluster.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30358822</th>\n",
" <td>modreg is freely accessible at http://cis.hku.hk/software/modreg.tar.gz.</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30423084</th>\n",
" <td>an implementation of our methodology is available in the r package itop on cran. additionally, a...</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30561547</th>\n",
" <td>datasets and scripts for reproduction of results are available through: https://nalab.stanford.e...</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30596886</th>\n",
" <td>r package at the b2slab site: http://b2slab.upc.edu/software-and-tutorials/ and python package: ...</td>\n",
" <td>{'pypi': ['multiview.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30657866</th>\n",
" <td>diablo is implemented in the mixomics r bioconductor package with functions for parameters' choi...</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30668675</th>\n",
" <td>the reprogenomics viewer resource is freely accessible at http://rgv.genouest.org. the website i...</td>\n",
" <td>{'github': ['github.com/fchalmel/RGV.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30698637</th>\n",
" <td>code for nemo and for reproducing all nemo results in this paper is in github: https://github.co...</td>\n",
" <td>{'github': ['github.com/Shamir-Lab/NEMO.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30768150</th>\n",
" <td>drugcomboexplorer is available at https://github.com/roosevelt-pku/drugcombinationprediction.</td>\n",
" <td>{'github': ['github.com/Roosevelt-PKU/drugcombinationprediction.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30825303</th>\n",
" <td>prediction results and a reference implementation of dgmdl in python is available on https://git...</td>\n",
" <td>{'github': ['github.com/luoping1004/dgMDL.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30863842</th>\n",
" <td>the r package absnf is freely available for downloading from https://github.com/pfruan/absnf.</td>\n",
" <td>{'github': ['github.com/pfruan/abSNF.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30957844</th>\n",
" <td>the smccnet algorithm is written in r, and is freely available on the web at https://cran.r-proj...</td>\n",
" <td>{'cran': ['SmCCNet/index.html.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31141125</th>\n",
" <td>cnet can be downloaded at https://github.com/bsml320/cnet.</td>\n",
" <td>{'github': ['github.com/bsml320/CNet.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31406990</th>\n",
" <td>our software is available at https://github.com/vgteam/vg, https://github.com/jltsiren/gbwt and ...</td>\n",
" <td>{'github': ['github.com/vgteam/vg,', 'github.com/jltsiren/gbwt', 'github.com/jltsiren/gcsa2.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31501851</th>\n",
" <td>spectrum is available as an r software package from cran https://cran.r-project.org/web/packages...</td>\n",
" <td>{'cran': ['Spectrum/index.html.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31504182</th>\n",
" <td>the r package 'mnd' is available at url: https://www.itb.cnr.it/mnd.</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31504184</th>\n",
" <td>the lucid method is implemented through the lucidus r package available on cran (https://cran.r-...</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31510700</th>\n",
" <td>https://github.com/hosseinshn/moli.</td>\n",
" <td>{'github': ['github.com/hosseinshn/MOLI.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31693075</th>\n",
" <td>the r-package is available at https://github.com/yaluwen/omicpred.</td>\n",
" <td>{'github': ['github.com/YaluWen/OmicPred.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31693090</th>\n",
" <td>code can be downloaded from https://github.com/zoesgithub/mtbnn.</td>\n",
" <td>{'github': ['github.com/Zoesgithub/MtBNN.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31873721</th>\n",
" <td>the raw files of metagenomics, metabolomics, and transcriptomics data can be accessed at ncbi sr...</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31913435</th>\n",
" <td>the r-package is available at https://github.com/yaluwen/uomic.</td>\n",
" <td>{'github': ['github.com/YaluWen/Uomic.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31960894</th>\n",
" <td>the sismonr package is implemented in r and julia and is publicly available on the cran reposito...</td>\n",
" <td>{'.git': ['https://oliviaab.github.io/sismonr/.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32176258</th>\n",
" <td>m2ia is public available at http://m2ia.met-bioinformatics.cn.</td>\n",
" <td>{}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32657371</th>\n",
" <td>https://github.com/hosseinshn/aitl.</td>\n",
" <td>{'github': ['github.com/hosseinshn/AITL.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32657382</th>\n",
" <td>unioncom software is available at https://github.com/caokai1073/unioncom.</td>\n",
" <td>{'github': ['github.com/caokai1073/UnionCom.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32692809</th>\n",
" <td>source code is available at https://github.com/cheminfo/compass.</td>\n",
" <td>{'github': ['github.com/cheminfo/COMPASS.']}</td>\n",
" <td>Bioinformatics (Oxford, England)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" match \\\n",
"uid \n",
"25527095 the netgestalt crc portal can be freely accessed at http://www.netgestalt.org. \n",
"26377073 the source code repository is publicly available at https://github.com/yangzi4/inmf. \n",
"26568631 chronos is freely available at http://biosignal.med.upatras.gr/chronos/ \n",
"26787660 genetrail2 can be freely accessed under https://genetrail2.bioinf.uni-sb.de \n",
"26883487 the source code, required data for prediction, and demo data for test are freely available at: h... \n",
"27797774 http://metagenomics.atc.tcs.com/webigloo and http://121.241.184.233/webigloo [freely available f... \n",
"28082455 : the code for this work is available at https://github.com/olganikolova/gbgfa. \n",
"28334215 matlab codes for flux balance analysis in this study are available in supplementary material. \n",
"28407042 the source code is at https://github.com/zhangxf-ccnu/pdna. \n",
"28520848 pfa has been implemented as a matlab package, which is available at http://www.sysbio.ac.cn/cb/c... \n",
"28605519 the package is implemented in r and available under gpl-2 license from the bioconductor website ... \n",
"28961954 pimp is available at http://polyomics.mvls.gla.ac.uk, and access is freely available on request.... \n",
"29069296 https://kuppal.shinyapps.io/xmwas (online) and https://github.com/kuppal2/xmwas/ (r). \n",
"29077792 proposed methods are available in the r package mixkernel, released on cran. it is fully compati... \n",
"29186328 an r package metapca is available online. (http://tsenglab.biostat.pitt.edu/software.htm). \n",
"29186355 processed datasets, r as well as matlab implementations of the methods are available at https://... \n",
"29547932 netics is available at https://github.com/cbg-ethz/netics. \n",
"29590301 genease can be accessed freely at http://research.cchmc.org/mershalab/genease/login.html. \n",
"29912365 software and vignettes are available at http://github.com/kevinvervier/tisan. \n",
"29931190 grimon is freely available at https://github.com/mkanai/grimon as an r package with example omic... \n",
"29939219 our r source code is available online at https://github.com/angy89/robustclusteringpatientsubtyp... \n",
"30059974 discoversl package with user manual and sample workflow is available for download from github ur... \n",
"30184058 freely available on the github at https://github.com/chpgenetics/fbm. \n",
"30202885 tobmiknn is freely available at https://github.com/xuesidong/tobmi. \n",
"30239606 we have implemented the proposed methods in an r package freely available at http://www.github.c... \n",
"30295701 https://folk.uio.no/hadift/miv/ [user/pass via hadift@medisin. uio.no]. \n",
"30329022 https://github.com/michaelsharpnack/grassmanncluster. \n",
"30358822 modreg is freely accessible at http://cis.hku.hk/software/modreg.tar.gz. \n",
"30423084 an implementation of our methodology is available in the r package itop on cran. additionally, a... \n",
"30561547 datasets and scripts for reproduction of results are available through: https://nalab.stanford.e... \n",
"30596886 r package at the b2slab site: http://b2slab.upc.edu/software-and-tutorials/ and python package: ... \n",
"30657866 diablo is implemented in the mixomics r bioconductor package with functions for parameters' choi... \n",
"30668675 the reprogenomics viewer resource is freely accessible at http://rgv.genouest.org. the website i... \n",
"30698637 code for nemo and for reproducing all nemo results in this paper is in github: https://github.co... \n",
"30768150 drugcomboexplorer is available at https://github.com/roosevelt-pku/drugcombinationprediction. \n",
"30825303 prediction results and a reference implementation of dgmdl in python is available on https://git... \n",
"30863842 the r package absnf is freely available for downloading from https://github.com/pfruan/absnf. \n",
"30957844 the smccnet algorithm is written in r, and is freely available on the web at https://cran.r-proj... \n",
"31141125 cnet can be downloaded at https://github.com/bsml320/cnet. \n",
"31406990 our software is available at https://github.com/vgteam/vg, https://github.com/jltsiren/gbwt and ... \n",
"31501851 spectrum is available as an r software package from cran https://cran.r-project.org/web/packages... \n",
"31504182 the r package 'mnd' is available at url: https://www.itb.cnr.it/mnd. \n",
"31504184 the lucid method is implemented through the lucidus r package available on cran (https://cran.r-... \n",
"31510700 https://github.com/hosseinshn/moli. \n",
"31693075 the r-package is available at https://github.com/yaluwen/omicpred. \n",
"31693090 code can be downloaded from https://github.com/zoesgithub/mtbnn. \n",
"31873721 the raw files of metagenomics, metabolomics, and transcriptomics data can be accessed at ncbi sr... \n",
"31913435 the r-package is available at https://github.com/yaluwen/uomic. \n",
"31960894 the sismonr package is implemented in r and julia and is publicly available on the cran reposito... \n",
"32176258 m2ia is public available at http://m2ia.met-bioinformatics.cn. \n",
"32657371 https://github.com/hosseinshn/aitl. \n",
"32657382 unioncom software is available at https://github.com/caokai1073/unioncom. \n",
"32692809 source code is available at https://github.com/cheminfo/compass. \n",
"\n",
" detected_code_links \\\n",
"uid \n",
"25527095 {} \n",
"26377073 {'github': ['github.com/yangzi4/iNMF.']} \n",
"26568631 {} \n",
"26787660 {} \n",
"26883487 {} \n",
"27797774 {} \n",
"28082455 {'github': ['github.com/olganikolova/gbgfa.']} \n",
"28334215 {} \n",
"28407042 {'github': ['github.com/Zhangxf-ccnu/pDNA.']} \n",
"28520848 {} \n",
"28605519 {'bioconductor': ['CancerSubtypes/).']} \n",
"28961954 {'github': ['github.com/RonanDaly/pimp']} \n",
"29069296 {'github': ['github.com/kuppal2/xMWAS/']} \n",
"29077792 {} \n",
"29186328 {} \n",
"29186355 {'github': ['github.com/mehr-een/bemkl-rbps.']} \n",
"29547932 {'github': ['github.com/cbg-ethz/netics.']} \n",
"29590301 {} \n",
"29912365 {'github': ['github.com/kevinVervier/TiSAn.']} \n",
"29931190 {'github': ['github.com/mkanai/grimon']} \n",
"29939219 {'github': ['github.com/angy89/RobustClusteringPatientSubtyping.']} \n",
"30059974 {'github': ['github.com/shaoli86/DiscoverSL/releases/tag/V1.0']} \n",
"30184058 {'github': ['github.com/CHPGenetics/FBM.']} \n",
"30202885 {'github': ['github.com/XuesiDong/TOBMI.']} \n",
"30239606 {'github': ['github.com/baolinwu/MSKAT.'], '.git': ['http://www.github.com/baolinwu/MSKAT.']} \n",
"30295701 {} \n",
"30329022 {'github': ['github.com/michaelsharpnack/GrassmannCluster.']} \n",
"30358822 {} \n",
"30423084 {} \n",
"30561547 {} \n",
"30596886 {'pypi': ['multiview.']} \n",
"30657866 {} \n",
"30668675 {'github': ['github.com/fchalmel/RGV.']} \n",
"30698637 {'github': ['github.com/Shamir-Lab/NEMO.']} \n",
"30768150 {'github': ['github.com/Roosevelt-PKU/drugcombinationprediction.']} \n",
"30825303 {'github': ['github.com/luoping1004/dgMDL.']} \n",
"30863842 {'github': ['github.com/pfruan/abSNF.']} \n",
"30957844 {'cran': ['SmCCNet/index.html.']} \n",
"31141125 {'github': ['github.com/bsml320/CNet.']} \n",
"31406990 {'github': ['github.com/vgteam/vg,', 'github.com/jltsiren/gbwt', 'github.com/jltsiren/gcsa2.']} \n",
"31501851 {'cran': ['Spectrum/index.html.']} \n",
"31504182 {} \n",
"31504184 {} \n",
"31510700 {'github': ['github.com/hosseinshn/MOLI.']} \n",
"31693075 {'github': ['github.com/YaluWen/OmicPred.']} \n",
"31693090 {'github': ['github.com/Zoesgithub/MtBNN.']} \n",
"31873721 {} \n",
"31913435 {'github': ['github.com/YaluWen/Uomic.']} \n",
"31960894 {'.git': ['https://oliviaab.github.io/sismonr/.']} \n",
"32176258 {} \n",
"32657371 {'github': ['github.com/hosseinshn/AITL.']} \n",
"32657382 {'github': ['github.com/caokai1073/UnionCom.']} \n",
"32692809 {'github': ['github.com/cheminfo/COMPASS.']} \n",
"\n",
" journal \n",
"uid \n",
"25527095 Bioinformatics (Oxford, England) \n",
"26377073 Bioinformatics (Oxford, England) \n",
"26568631 Bioinformatics (Oxford, England) \n",
"26787660 Bioinformatics (Oxford, England) \n",
"26883487 Bioinformatics (Oxford, England) \n",
"27797774 Bioinformatics (Oxford, England) \n",
"28082455 Bioinformatics (Oxford, England) \n",
"28334215 Bioinformatics (Oxford, England) \n",
"28407042 Bioinformatics (Oxford, England) \n",
"28520848 Bioinformatics (Oxford, England) \n",
"28605519 Bioinformatics (Oxford, England) \n",
"28961954 Bioinformatics (Oxford, England) \n",
"29069296 Bioinformatics (Oxford, England) \n",
"29077792 Bioinformatics (Oxford, England) \n",
"29186328 Bioinformatics (Oxford, England) \n",
"29186355 Bioinformatics (Oxford, England) \n",
"29547932 Bioinformatics (Oxford, England) \n",
"29590301 Bioinformatics (Oxford, England) \n",
"29912365 Bioinformatics (Oxford, England) \n",
"29931190 Bioinformatics (Oxford, England) \n",
"29939219 Bioinformatics (Oxford, England) \n",
"30059974 Bioinformatics (Oxford, England) \n",
"30184058 Bioinformatics (Oxford, England) \n",
"30202885 Bioinformatics (Oxford, England) \n",
"30239606 Bioinformatics (Oxford, England) \n",
"30295701 Bioinformatics (Oxford, England) \n",
"30329022 Bioinformatics (Oxford, England) \n",
"30358822 Bioinformatics (Oxford, England) \n",
"30423084 Bioinformatics (Oxford, England) \n",
"30561547 Bioinformatics (Oxford, England) \n",
"30596886 Bioinformatics (Oxford, England) \n",
"30657866 Bioinformatics (Oxford, England) \n",
"30668675 Bioinformatics (Oxford, England) \n",
"30698637 Bioinformatics (Oxford, England) \n",
"30768150 Bioinformatics (Oxford, England) \n",
"30825303 Bioinformatics (Oxford, England) \n",
"30863842 Bioinformatics (Oxford, England) \n",
"30957844 Bioinformatics (Oxford, England) \n",
"31141125 Bioinformatics (Oxford, England) \n",
"31406990 Bioinformatics (Oxford, England) \n",
"31501851 Bioinformatics (Oxford, England) \n",
"31504182 Bioinformatics (Oxford, England) \n",
"31504184 Bioinformatics (Oxford, England) \n",
"31510700 Bioinformatics (Oxford, England) \n",
"31693075 Bioinformatics (Oxford, England) \n",
"31693090 Bioinformatics (Oxford, England) \n",
"31873721 Bioinformatics (Oxford, England) \n",
"31913435 Bioinformatics (Oxford, England) \n",
"31960894 Bioinformatics (Oxford, England) \n",
"32176258 Bioinformatics (Oxford, England) \n",
"32657371 Bioinformatics (Oxford, England) \n",
"32657382 Bioinformatics (Oxford, England) \n",
"32692809 Bioinformatics (Oxford, England) "
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"header = 'availability and implementation'\n",
"\n",
"get_statments(f'{escape(header)}\\n(.*?)\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Store the data"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"Stored `code_repositories` (C557967A → 5FF4AA2D) at Saturday, 25. Jul 2020 15:56"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {
"text/markdown": {
"action": "store",
"command": "store code_repositories in pubmed_derived_data",
"finished": "2020-07-25T15:56:26.412470",
"finished_human_readable": "Saturday, 25. Jul 2020 15:56",
"result": [
{
"new_file": {
"crc32": "5FF4AA2D",
"sha256": "92B28FE95EA205C4311BD4E9D6360D87087D0C5D452CCF9567829CFFD27EE1E5"
},
"old_file": {
"crc32": "C557967A",
"sha256": "53E92545F8C164C57DD36D4DD7B4823D960B0C576524740753A902E59D8E438C"
},
"subject": "code_repositories"
}
],
"started": "2020-07-25T15:56:25.425898"
}
},
"output_type": "display_data"
}
],
"source": [
"%vault store code_repositories in pubmed_derived_data"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}